1/* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38#include <linux/dma-mapping.h> 39#include <linux/err.h> 40#include <linux/idr.h> 41#include <linux/interrupt.h> 42#include <linux/rbtree.h> 43#include <linux/sched.h> 44#include <linux/spinlock.h> 45#include <linux/workqueue.h> 46#include <linux/completion.h> 47#include <linux/slab.h> 48#include <linux/module.h> 49#include <linux/sysctl.h> 50 51#include <rdma/iw_cm.h> 52#include <rdma/ib_addr.h> 53 54#include "iwcm.h" 55 56MODULE_AUTHOR("Tom Tucker"); 57MODULE_DESCRIPTION("iWARP CM"); 58MODULE_LICENSE("Dual BSD/GPL"); 59 60static struct workqueue_struct *iwcm_wq; 61struct iwcm_work { 62 struct work_struct work; 63 struct iwcm_id_private *cm_id; 64 struct list_head list; 65 struct iw_cm_event event; 66 struct list_head free_list; 67}; 68 69static unsigned int default_backlog = 256; 70 71static struct ctl_table_header *iwcm_ctl_table_hdr; 72static struct ctl_table iwcm_ctl_table[] = { 73 { 74 .procname = "default_backlog", 75 .data = &default_backlog, 76 .maxlen = sizeof(default_backlog), 77 .mode = 0644, 78 .proc_handler = proc_dointvec, 79 }, 80 { } 81}; 82 83/* 84 * The following services provide a mechanism for pre-allocating iwcm_work 85 * elements. The design pre-allocates them based on the cm_id type: 86 * LISTENING IDS: Get enough elements preallocated to handle the 87 * listen backlog. 88 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 89 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 90 * 91 * Allocating them in connect and listen avoids having to deal 92 * with allocation failures on the event upcall from the provider (which 93 * is called in the interrupt context). 94 * 95 * One exception is when creating the cm_id for incoming connection requests. 96 * There are two cases: 97 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 98 * the backlog is exceeded, then no more connection request events will 99 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 100 * to the provider to reject the connection request. 101 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 102 * If work elements cannot be allocated for the new connect request cm_id, 103 * then IWCM will call the provider reject method. This is ok since 104 * cm_conn_req_handler() runs in the workqueue thread context. 105 */ 106 107static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 108{ 109 struct iwcm_work *work; 110 111 if (list_empty(&cm_id_priv->work_free_list)) 112 return NULL; 113 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 114 free_list); 115 list_del_init(&work->free_list); 116 return work; 117} 118 119static void put_work(struct iwcm_work *work) 120{ 121 list_add(&work->free_list, &work->cm_id->work_free_list); 122} 123 124static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 125{ 126 struct list_head *e, *tmp; 127 128 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) 129 kfree(list_entry(e, struct iwcm_work, free_list)); 130} 131 132static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 133{ 134 struct iwcm_work *work; 135 136 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 137 while (count--) { 138 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 139 if (!work) { 140 dealloc_work_entries(cm_id_priv); 141 return -ENOMEM; 142 } 143 work->cm_id = cm_id_priv; 144 INIT_LIST_HEAD(&work->list); 145 put_work(work); 146 } 147 return 0; 148} 149 150/* 151 * Save private data from incoming connection requests to 152 * iw_cm_event, so the low level driver doesn't have to. Adjust 153 * the event ptr to point to the local copy. 154 */ 155static int copy_private_data(struct iw_cm_event *event) 156{ 157 void *p; 158 159 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 160 if (!p) 161 return -ENOMEM; 162 event->private_data = p; 163 return 0; 164} 165 166static void free_cm_id(struct iwcm_id_private *cm_id_priv) 167{ 168 dealloc_work_entries(cm_id_priv); 169 kfree(cm_id_priv); 170} 171 172/* 173 * Release a reference on cm_id. If the last reference is being 174 * released, enable the waiting thread (in iw_destroy_cm_id) to 175 * get woken up, and return 1 if a thread is already waiting. 176 */ 177static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 178{ 179 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 180 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 181 BUG_ON(!list_empty(&cm_id_priv->work_list)); 182 complete(&cm_id_priv->destroy_comp); 183 return 1; 184 } 185 186 return 0; 187} 188 189static void add_ref(struct iw_cm_id *cm_id) 190{ 191 struct iwcm_id_private *cm_id_priv; 192 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 193 atomic_inc(&cm_id_priv->refcount); 194} 195 196static void rem_ref(struct iw_cm_id *cm_id) 197{ 198 struct iwcm_id_private *cm_id_priv; 199 int cb_destroy; 200 201 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 202 203 /* 204 * Test bit before deref in case the cm_id gets freed on another 205 * thread. 206 */ 207 cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 208 if (iwcm_deref_id(cm_id_priv) && cb_destroy) { 209 BUG_ON(!list_empty(&cm_id_priv->work_list)); 210 free_cm_id(cm_id_priv); 211 } 212} 213 214static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 215 216struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 217 iw_cm_handler cm_handler, 218 void *context) 219{ 220 struct iwcm_id_private *cm_id_priv; 221 222 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 223 if (!cm_id_priv) 224 return ERR_PTR(-ENOMEM); 225 226 cm_id_priv->state = IW_CM_STATE_IDLE; 227 cm_id_priv->id.device = device; 228 cm_id_priv->id.cm_handler = cm_handler; 229 cm_id_priv->id.context = context; 230 cm_id_priv->id.event_handler = cm_event_handler; 231 cm_id_priv->id.add_ref = add_ref; 232 cm_id_priv->id.rem_ref = rem_ref; 233 spin_lock_init(&cm_id_priv->lock); 234 atomic_set(&cm_id_priv->refcount, 1); 235 init_waitqueue_head(&cm_id_priv->connect_wait); 236 init_completion(&cm_id_priv->destroy_comp); 237 INIT_LIST_HEAD(&cm_id_priv->work_list); 238 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 239 240 return &cm_id_priv->id; 241} 242EXPORT_SYMBOL(iw_create_cm_id); 243 244 245static int iwcm_modify_qp_err(struct ib_qp *qp) 246{ 247 struct ib_qp_attr qp_attr; 248 249 if (!qp) 250 return -EINVAL; 251 252 qp_attr.qp_state = IB_QPS_ERR; 253 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 254} 255 256/* 257 * This is really the RDMAC CLOSING state. It is most similar to the 258 * IB SQD QP state. 259 */ 260static int iwcm_modify_qp_sqd(struct ib_qp *qp) 261{ 262 struct ib_qp_attr qp_attr; 263 264 BUG_ON(qp == NULL); 265 qp_attr.qp_state = IB_QPS_SQD; 266 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 267} 268 269/* 270 * CM_ID <-- CLOSING 271 * 272 * Block if a passive or active connection is currently being processed. Then 273 * process the event as follows: 274 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 275 * based on the abrupt flag 276 * - If the connection is already in the CLOSING or IDLE state, the peer is 277 * disconnecting concurrently with us and we've already seen the 278 * DISCONNECT event -- ignore the request and return 0 279 * - Disconnect on a listening endpoint returns -EINVAL 280 */ 281int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 282{ 283 struct iwcm_id_private *cm_id_priv; 284 unsigned long flags; 285 int ret = 0; 286 struct ib_qp *qp = NULL; 287 288 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 289 /* Wait if we're currently in a connect or accept downcall */ 290 wait_event(cm_id_priv->connect_wait, 291 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 292 293 spin_lock_irqsave(&cm_id_priv->lock, flags); 294 switch (cm_id_priv->state) { 295 case IW_CM_STATE_ESTABLISHED: 296 cm_id_priv->state = IW_CM_STATE_CLOSING; 297 298 /* QP could be <nul> for user-mode client */ 299 if (cm_id_priv->qp) 300 qp = cm_id_priv->qp; 301 else 302 ret = -EINVAL; 303 break; 304 case IW_CM_STATE_LISTEN: 305 ret = -EINVAL; 306 break; 307 case IW_CM_STATE_CLOSING: 308 /* remote peer closed first */ 309 case IW_CM_STATE_IDLE: 310 /* accept or connect returned !0 */ 311 break; 312 case IW_CM_STATE_CONN_RECV: 313 /* 314 * App called disconnect before/without calling accept after 315 * connect_request event delivered. 316 */ 317 break; 318 case IW_CM_STATE_CONN_SENT: 319 /* Can only get here if wait above fails */ 320 default: 321 BUG(); 322 } 323 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 324 325 if (qp) { 326 if (abrupt) 327 ret = iwcm_modify_qp_err(qp); 328 else 329 ret = iwcm_modify_qp_sqd(qp); 330 331 /* 332 * If both sides are disconnecting the QP could 333 * already be in ERR or SQD states 334 */ 335 ret = 0; 336 } 337 338 return ret; 339} 340EXPORT_SYMBOL(iw_cm_disconnect); 341 342/* 343 * CM_ID <-- DESTROYING 344 * 345 * Clean up all resources associated with the connection and release 346 * the initial reference taken by iw_create_cm_id. 347 */ 348static void destroy_cm_id(struct iw_cm_id *cm_id) 349{ 350 struct iwcm_id_private *cm_id_priv; 351 unsigned long flags; 352 353 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 354 /* 355 * Wait if we're currently in a connect or accept downcall. A 356 * listening endpoint should never block here. 357 */ 358 wait_event(cm_id_priv->connect_wait, 359 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 360 361 spin_lock_irqsave(&cm_id_priv->lock, flags); 362 switch (cm_id_priv->state) { 363 case IW_CM_STATE_LISTEN: 364 cm_id_priv->state = IW_CM_STATE_DESTROYING; 365 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 366 /* destroy the listening endpoint */ 367 cm_id->device->iwcm->destroy_listen(cm_id); 368 spin_lock_irqsave(&cm_id_priv->lock, flags); 369 break; 370 case IW_CM_STATE_ESTABLISHED: 371 cm_id_priv->state = IW_CM_STATE_DESTROYING; 372 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 373 /* Abrupt close of the connection */ 374 (void)iwcm_modify_qp_err(cm_id_priv->qp); 375 spin_lock_irqsave(&cm_id_priv->lock, flags); 376 break; 377 case IW_CM_STATE_IDLE: 378 case IW_CM_STATE_CLOSING: 379 cm_id_priv->state = IW_CM_STATE_DESTROYING; 380 break; 381 case IW_CM_STATE_CONN_RECV: 382 /* 383 * App called destroy before/without calling accept after 384 * receiving connection request event notification or 385 * returned non zero from the event callback function. 386 * In either case, must tell the provider to reject. 387 */ 388 cm_id_priv->state = IW_CM_STATE_DESTROYING; 389 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 390 cm_id->device->iwcm->reject(cm_id, NULL, 0); 391 spin_lock_irqsave(&cm_id_priv->lock, flags); 392 break; 393 case IW_CM_STATE_CONN_SENT: 394 case IW_CM_STATE_DESTROYING: 395 default: 396 BUG(); 397 break; 398 } 399 if (cm_id_priv->qp) { 400 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 401 cm_id_priv->qp = NULL; 402 } 403 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 404 405 (void)iwcm_deref_id(cm_id_priv); 406} 407 408/* 409 * This function is only called by the application thread and cannot 410 * be called by the event thread. The function will wait for all 411 * references to be released on the cm_id and then kfree the cm_id 412 * object. 413 */ 414void iw_destroy_cm_id(struct iw_cm_id *cm_id) 415{ 416 struct iwcm_id_private *cm_id_priv; 417 418 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 419 BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); 420 421 destroy_cm_id(cm_id); 422 423 wait_for_completion(&cm_id_priv->destroy_comp); 424 425 free_cm_id(cm_id_priv); 426} 427EXPORT_SYMBOL(iw_destroy_cm_id); 428 429/* 430 * CM_ID <-- LISTEN 431 * 432 * Start listening for connect requests. Generates one CONNECT_REQUEST 433 * event for each inbound connect request. 434 */ 435int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 436{ 437 struct iwcm_id_private *cm_id_priv; 438 unsigned long flags; 439 int ret; 440 441 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 442 443 if (!backlog) 444 backlog = default_backlog; 445 446 ret = alloc_work_entries(cm_id_priv, backlog); 447 if (ret) 448 return ret; 449 450 spin_lock_irqsave(&cm_id_priv->lock, flags); 451 switch (cm_id_priv->state) { 452 case IW_CM_STATE_IDLE: 453 cm_id_priv->state = IW_CM_STATE_LISTEN; 454 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 455 ret = cm_id->device->iwcm->create_listen(cm_id, backlog); 456 if (ret) 457 cm_id_priv->state = IW_CM_STATE_IDLE; 458 spin_lock_irqsave(&cm_id_priv->lock, flags); 459 break; 460 default: 461 ret = -EINVAL; 462 } 463 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 464 465 return ret; 466} 467EXPORT_SYMBOL(iw_cm_listen); 468 469/* 470 * CM_ID <-- IDLE 471 * 472 * Rejects an inbound connection request. No events are generated. 473 */ 474int iw_cm_reject(struct iw_cm_id *cm_id, 475 const void *private_data, 476 u8 private_data_len) 477{ 478 struct iwcm_id_private *cm_id_priv; 479 unsigned long flags; 480 int ret; 481 482 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 483 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 484 485 spin_lock_irqsave(&cm_id_priv->lock, flags); 486 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 487 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 488 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 489 wake_up_all(&cm_id_priv->connect_wait); 490 return -EINVAL; 491 } 492 cm_id_priv->state = IW_CM_STATE_IDLE; 493 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 494 495 ret = cm_id->device->iwcm->reject(cm_id, private_data, 496 private_data_len); 497 498 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 499 wake_up_all(&cm_id_priv->connect_wait); 500 501 return ret; 502} 503EXPORT_SYMBOL(iw_cm_reject); 504 505/* 506 * CM_ID <-- ESTABLISHED 507 * 508 * Accepts an inbound connection request and generates an ESTABLISHED 509 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 510 * until the ESTABLISHED event is received from the provider. 511 */ 512int iw_cm_accept(struct iw_cm_id *cm_id, 513 struct iw_cm_conn_param *iw_param) 514{ 515 struct iwcm_id_private *cm_id_priv; 516 struct ib_qp *qp; 517 unsigned long flags; 518 int ret; 519 520 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 521 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 522 523 spin_lock_irqsave(&cm_id_priv->lock, flags); 524 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 525 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 526 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 527 wake_up_all(&cm_id_priv->connect_wait); 528 return -EINVAL; 529 } 530 /* Get the ib_qp given the QPN */ 531 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 532 if (!qp) { 533 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 534 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 535 wake_up_all(&cm_id_priv->connect_wait); 536 return -EINVAL; 537 } 538 cm_id->device->iwcm->add_ref(qp); 539 cm_id_priv->qp = qp; 540 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 541 542 ret = cm_id->device->iwcm->accept(cm_id, iw_param); 543 if (ret) { 544 /* An error on accept precludes provider events */ 545 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 546 cm_id_priv->state = IW_CM_STATE_IDLE; 547 spin_lock_irqsave(&cm_id_priv->lock, flags); 548 if (cm_id_priv->qp) { 549 cm_id->device->iwcm->rem_ref(qp); 550 cm_id_priv->qp = NULL; 551 } 552 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 553 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 554 wake_up_all(&cm_id_priv->connect_wait); 555 } 556 557 return ret; 558} 559EXPORT_SYMBOL(iw_cm_accept); 560 561/* 562 * Active Side: CM_ID <-- CONN_SENT 563 * 564 * If successful, results in the generation of a CONNECT_REPLY 565 * event. iw_cm_disconnect and iw_cm_destroy will block until the 566 * CONNECT_REPLY event is received from the provider. 567 */ 568int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 569{ 570 struct iwcm_id_private *cm_id_priv; 571 int ret; 572 unsigned long flags; 573 struct ib_qp *qp; 574 575 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 576 577 ret = alloc_work_entries(cm_id_priv, 4); 578 if (ret) 579 return ret; 580 581 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 582 spin_lock_irqsave(&cm_id_priv->lock, flags); 583 584 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 585 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 586 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 587 wake_up_all(&cm_id_priv->connect_wait); 588 return -EINVAL; 589 } 590 591 /* Get the ib_qp given the QPN */ 592 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 593 if (!qp) { 594 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 595 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 596 wake_up_all(&cm_id_priv->connect_wait); 597 return -EINVAL; 598 } 599 cm_id->device->iwcm->add_ref(qp); 600 cm_id_priv->qp = qp; 601 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 602 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 603 604 ret = cm_id->device->iwcm->connect(cm_id, iw_param); 605 if (ret) { 606 spin_lock_irqsave(&cm_id_priv->lock, flags); 607 if (cm_id_priv->qp) { 608 cm_id->device->iwcm->rem_ref(qp); 609 cm_id_priv->qp = NULL; 610 } 611 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 612 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 613 cm_id_priv->state = IW_CM_STATE_IDLE; 614 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 615 wake_up_all(&cm_id_priv->connect_wait); 616 } 617 618 return ret; 619} 620EXPORT_SYMBOL(iw_cm_connect); 621 622/* 623 * Passive Side: new CM_ID <-- CONN_RECV 624 * 625 * Handles an inbound connect request. The function creates a new 626 * iw_cm_id to represent the new connection and inherits the client 627 * callback function and other attributes from the listening parent. 628 * 629 * The work item contains a pointer to the listen_cm_id and the event. The 630 * listen_cm_id contains the client cm_handler, context and 631 * device. These are copied when the device is cloned. The event 632 * contains the new four tuple. 633 * 634 * An error on the child should not affect the parent, so this 635 * function does not return a value. 636 */ 637static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 638 struct iw_cm_event *iw_event) 639{ 640 unsigned long flags; 641 struct iw_cm_id *cm_id; 642 struct iwcm_id_private *cm_id_priv; 643 int ret; 644 645 /* 646 * The provider should never generate a connection request 647 * event with a bad status. 648 */ 649 BUG_ON(iw_event->status); 650 651 cm_id = iw_create_cm_id(listen_id_priv->id.device, 652 listen_id_priv->id.cm_handler, 653 listen_id_priv->id.context); 654 /* If the cm_id could not be created, ignore the request */ 655 if (IS_ERR(cm_id)) 656 goto out; 657 658 cm_id->provider_data = iw_event->provider_data; 659 cm_id->local_addr = iw_event->local_addr; 660 cm_id->remote_addr = iw_event->remote_addr; 661 662 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 663 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 664 665 /* 666 * We could be destroying the listening id. If so, ignore this 667 * upcall. 668 */ 669 spin_lock_irqsave(&listen_id_priv->lock, flags); 670 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 671 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 672 iw_cm_reject(cm_id, NULL, 0); 673 iw_destroy_cm_id(cm_id); 674 goto out; 675 } 676 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 677 678 ret = alloc_work_entries(cm_id_priv, 3); 679 if (ret) { 680 iw_cm_reject(cm_id, NULL, 0); 681 iw_destroy_cm_id(cm_id); 682 goto out; 683 } 684 685 /* Call the client CM handler */ 686 ret = cm_id->cm_handler(cm_id, iw_event); 687 if (ret) { 688 iw_cm_reject(cm_id, NULL, 0); 689 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 690 destroy_cm_id(cm_id); 691 if (atomic_read(&cm_id_priv->refcount)==0) 692 free_cm_id(cm_id_priv); 693 } 694 695out: 696 if (iw_event->private_data_len) 697 kfree(iw_event->private_data); 698} 699 700/* 701 * Passive Side: CM_ID <-- ESTABLISHED 702 * 703 * The provider generated an ESTABLISHED event which means that 704 * the MPA negotion has completed successfully and we are now in MPA 705 * FPDU mode. 706 * 707 * This event can only be received in the CONN_RECV state. If the 708 * remote peer closed, the ESTABLISHED event would be received followed 709 * by the CLOSE event. If the app closes, it will block until we wake 710 * it up after processing this event. 711 */ 712static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 713 struct iw_cm_event *iw_event) 714{ 715 unsigned long flags; 716 int ret; 717 718 spin_lock_irqsave(&cm_id_priv->lock, flags); 719 720 /* 721 * We clear the CONNECT_WAIT bit here to allow the callback 722 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 723 * from a callback handler is not allowed. 724 */ 725 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 726 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 727 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 728 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 729 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 730 wake_up_all(&cm_id_priv->connect_wait); 731 732 return ret; 733} 734 735/* 736 * Active Side: CM_ID <-- ESTABLISHED 737 * 738 * The app has called connect and is waiting for the established event to 739 * post it's requests to the server. This event will wake up anyone 740 * blocked in iw_cm_disconnect or iw_destroy_id. 741 */ 742static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 743 struct iw_cm_event *iw_event) 744{ 745 unsigned long flags; 746 int ret; 747 748 spin_lock_irqsave(&cm_id_priv->lock, flags); 749 /* 750 * Clear the connect wait bit so a callback function calling 751 * iw_cm_disconnect will not wait and deadlock this thread 752 */ 753 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 754 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 755 if (iw_event->status == 0) { 756 cm_id_priv->id.local_addr = iw_event->local_addr; 757 cm_id_priv->id.remote_addr = iw_event->remote_addr; 758 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 759 } else { 760 /* REJECTED or RESET */ 761 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 762 cm_id_priv->qp = NULL; 763 cm_id_priv->state = IW_CM_STATE_IDLE; 764 } 765 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 766 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 767 768 if (iw_event->private_data_len) 769 kfree(iw_event->private_data); 770 771 /* Wake up waiters on connect complete */ 772 wake_up_all(&cm_id_priv->connect_wait); 773 774 return ret; 775} 776 777/* 778 * CM_ID <-- CLOSING 779 * 780 * If in the ESTABLISHED state, move to CLOSING. 781 */ 782static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 783 struct iw_cm_event *iw_event) 784{ 785 unsigned long flags; 786 787 spin_lock_irqsave(&cm_id_priv->lock, flags); 788 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 789 cm_id_priv->state = IW_CM_STATE_CLOSING; 790 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 791} 792 793/* 794 * CM_ID <-- IDLE 795 * 796 * If in the ESTBLISHED or CLOSING states, the QP will have have been 797 * moved by the provider to the ERR state. Disassociate the CM_ID from 798 * the QP, move to IDLE, and remove the 'connected' reference. 799 * 800 * If in some other state, the cm_id was destroyed asynchronously. 801 * This is the last reference that will result in waking up 802 * the app thread blocked in iw_destroy_cm_id. 803 */ 804static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 805 struct iw_cm_event *iw_event) 806{ 807 unsigned long flags; 808 int ret = 0; 809 spin_lock_irqsave(&cm_id_priv->lock, flags); 810 811 if (cm_id_priv->qp) { 812 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 813 cm_id_priv->qp = NULL; 814 } 815 switch (cm_id_priv->state) { 816 case IW_CM_STATE_ESTABLISHED: 817 case IW_CM_STATE_CLOSING: 818 cm_id_priv->state = IW_CM_STATE_IDLE; 819 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 820 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 821 spin_lock_irqsave(&cm_id_priv->lock, flags); 822 break; 823 case IW_CM_STATE_DESTROYING: 824 break; 825 default: 826 BUG(); 827 } 828 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 829 830 return ret; 831} 832 833static int process_event(struct iwcm_id_private *cm_id_priv, 834 struct iw_cm_event *iw_event) 835{ 836 int ret = 0; 837 838 switch (iw_event->event) { 839 case IW_CM_EVENT_CONNECT_REQUEST: 840 cm_conn_req_handler(cm_id_priv, iw_event); 841 break; 842 case IW_CM_EVENT_CONNECT_REPLY: 843 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 844 break; 845 case IW_CM_EVENT_ESTABLISHED: 846 ret = cm_conn_est_handler(cm_id_priv, iw_event); 847 break; 848 case IW_CM_EVENT_DISCONNECT: 849 cm_disconnect_handler(cm_id_priv, iw_event); 850 break; 851 case IW_CM_EVENT_CLOSE: 852 ret = cm_close_handler(cm_id_priv, iw_event); 853 break; 854 default: 855 BUG(); 856 } 857 858 return ret; 859} 860 861/* 862 * Process events on the work_list for the cm_id. If the callback 863 * function requests that the cm_id be deleted, a flag is set in the 864 * cm_id flags to indicate that when the last reference is 865 * removed, the cm_id is to be destroyed. This is necessary to 866 * distinguish between an object that will be destroyed by the app 867 * thread asleep on the destroy_comp list vs. an object destroyed 868 * here synchronously when the last reference is removed. 869 */ 870static void cm_work_handler(struct work_struct *_work) 871{ 872 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 873 struct iw_cm_event levent; 874 struct iwcm_id_private *cm_id_priv = work->cm_id; 875 unsigned long flags; 876 int empty; 877 int ret = 0; 878 int destroy_id; 879 880 spin_lock_irqsave(&cm_id_priv->lock, flags); 881 empty = list_empty(&cm_id_priv->work_list); 882 while (!empty) { 883 work = list_entry(cm_id_priv->work_list.next, 884 struct iwcm_work, list); 885 list_del_init(&work->list); 886 empty = list_empty(&cm_id_priv->work_list); 887 levent = work->event; 888 put_work(work); 889 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 890 891 ret = process_event(cm_id_priv, &levent); 892 if (ret) { 893 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 894 destroy_cm_id(&cm_id_priv->id); 895 } 896 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 897 destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 898 if (iwcm_deref_id(cm_id_priv)) { 899 if (destroy_id) { 900 BUG_ON(!list_empty(&cm_id_priv->work_list)); 901 free_cm_id(cm_id_priv); 902 } 903 return; 904 } 905 if (empty) 906 return; 907 spin_lock_irqsave(&cm_id_priv->lock, flags); 908 } 909 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 910} 911 912/* 913 * This function is called on interrupt context. Schedule events on 914 * the iwcm_wq thread to allow callback functions to downcall into 915 * the CM and/or block. Events are queued to a per-CM_ID 916 * work_list. If this is the first event on the work_list, the work 917 * element is also queued on the iwcm_wq thread. 918 * 919 * Each event holds a reference on the cm_id. Until the last posted 920 * event has been delivered and processed, the cm_id cannot be 921 * deleted. 922 * 923 * Returns: 924 * 0 - the event was handled. 925 * -ENOMEM - the event was not handled due to lack of resources. 926 */ 927static int cm_event_handler(struct iw_cm_id *cm_id, 928 struct iw_cm_event *iw_event) 929{ 930 struct iwcm_work *work; 931 struct iwcm_id_private *cm_id_priv; 932 unsigned long flags; 933 int ret = 0; 934 935 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 936 937 spin_lock_irqsave(&cm_id_priv->lock, flags); 938 work = get_work(cm_id_priv); 939 if (!work) { 940 ret = -ENOMEM; 941 goto out; 942 } 943 944 INIT_WORK(&work->work, cm_work_handler); 945 work->cm_id = cm_id_priv; 946 work->event = *iw_event; 947 948 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 949 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 950 work->event.private_data_len) { 951 ret = copy_private_data(&work->event); 952 if (ret) { 953 put_work(work); 954 goto out; 955 } 956 } 957 958 atomic_inc(&cm_id_priv->refcount); 959 if (list_empty(&cm_id_priv->work_list)) { 960 list_add_tail(&work->list, &cm_id_priv->work_list); 961 queue_work(iwcm_wq, &work->work); 962 } else 963 list_add_tail(&work->list, &cm_id_priv->work_list); 964out: 965 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 966 return ret; 967} 968 969static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 970 struct ib_qp_attr *qp_attr, 971 int *qp_attr_mask) 972{ 973 unsigned long flags; 974 int ret; 975 976 spin_lock_irqsave(&cm_id_priv->lock, flags); 977 switch (cm_id_priv->state) { 978 case IW_CM_STATE_IDLE: 979 case IW_CM_STATE_CONN_SENT: 980 case IW_CM_STATE_CONN_RECV: 981 case IW_CM_STATE_ESTABLISHED: 982 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 983 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 984 IB_ACCESS_REMOTE_READ; 985 ret = 0; 986 break; 987 default: 988 ret = -EINVAL; 989 break; 990 } 991 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 992 return ret; 993} 994 995static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 996 struct ib_qp_attr *qp_attr, 997 int *qp_attr_mask) 998{ 999 unsigned long flags; 1000 int ret; 1001 1002 spin_lock_irqsave(&cm_id_priv->lock, flags); 1003 switch (cm_id_priv->state) { 1004 case IW_CM_STATE_IDLE: 1005 case IW_CM_STATE_CONN_SENT: 1006 case IW_CM_STATE_CONN_RECV: 1007 case IW_CM_STATE_ESTABLISHED: 1008 *qp_attr_mask = 0; 1009 ret = 0; 1010 break; 1011 default: 1012 ret = -EINVAL; 1013 break; 1014 } 1015 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1016 return ret; 1017} 1018 1019int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1020 struct ib_qp_attr *qp_attr, 1021 int *qp_attr_mask) 1022{ 1023 struct iwcm_id_private *cm_id_priv; 1024 int ret; 1025 1026 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1027 switch (qp_attr->qp_state) { 1028 case IB_QPS_INIT: 1029 case IB_QPS_RTR: 1030 ret = iwcm_init_qp_init_attr(cm_id_priv, 1031 qp_attr, qp_attr_mask); 1032 break; 1033 case IB_QPS_RTS: 1034 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1035 qp_attr, qp_attr_mask); 1036 break; 1037 default: 1038 ret = -EINVAL; 1039 break; 1040 } 1041 return ret; 1042} 1043EXPORT_SYMBOL(iw_cm_init_qp_attr); 1044 1045static int __init iw_cm_init(void) 1046{ 1047 iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); 1048 if (!iwcm_wq) 1049 return -ENOMEM; 1050 1051 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1052 iwcm_ctl_table); 1053 if (!iwcm_ctl_table_hdr) { 1054 pr_err("iw_cm: couldn't register sysctl paths\n"); 1055 destroy_workqueue(iwcm_wq); 1056 return -ENOMEM; 1057 } 1058 1059 return 0; 1060} 1061 1062static void __exit iw_cm_cleanup(void) 1063{ 1064 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1065 destroy_workqueue(iwcm_wq); 1066} 1067 1068module_init(iw_cm_init); 1069module_exit(iw_cm_cleanup); 1070