This source file includes following definitions.
- siw_device_register
- siw_device_cleanup
- siw_create_tx_threads
- siw_dev_qualified
- siw_init_cpulist
- siw_destroy_cpulist
- siw_get_tx_cpu
- siw_put_tx_cpu
- siw_get_base_qp
- siw_device_create
- siw_netdev_down
- siw_device_goes_down
- siw_netdev_event
- siw_newlink
- siw_init_module
- siw_exit_module
1
2
3
4
5
6 #include <linux/init.h>
7 #include <linux/errno.h>
8 #include <linux/netdevice.h>
9 #include <linux/inetdevice.h>
10 #include <net/net_namespace.h>
11 #include <linux/rtnetlink.h>
12 #include <linux/if_arp.h>
13 #include <linux/list.h>
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/module.h>
17 #include <linux/dma-mapping.h>
18
19 #include <rdma/ib_verbs.h>
20 #include <rdma/ib_user_verbs.h>
21 #include <rdma/rdma_netlink.h>
22 #include <linux/kthread.h>
23
24 #include "siw.h"
25 #include "siw_verbs.h"
26
27 MODULE_AUTHOR("Bernard Metzler");
28 MODULE_DESCRIPTION("Software iWARP Driver");
29 MODULE_LICENSE("Dual BSD/GPL");
30
31
32 const bool zcopy_tx = true;
33
34
35
36
37
38 const bool try_gso;
39
40
41 const bool loopback_enabled = true;
42
43
44 const bool mpa_crc_required;
45
46
47 const bool mpa_crc_strict;
48
49
50 const bool siw_tcp_nagle;
51
52
53 u_char mpa_version = MPA_REVISION_2;
54
55
56
57
58 const bool peer_to_peer;
59
60 struct task_struct *siw_tx_thread[NR_CPUS];
61 struct crypto_shash *siw_crypto_shash;
62
63 static int siw_device_register(struct siw_device *sdev, const char *name)
64 {
65 struct ib_device *base_dev = &sdev->base_dev;
66 static int dev_id = 1;
67 int rv;
68
69 rv = ib_register_device(base_dev, name);
70 if (rv) {
71 pr_warn("siw: device registration error %d\n", rv);
72 return rv;
73 }
74 sdev->vendor_part_id = dev_id++;
75
76 siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr);
77
78 return 0;
79 }
80
81 static void siw_device_cleanup(struct ib_device *base_dev)
82 {
83 struct siw_device *sdev = to_siw_dev(base_dev);
84
85 xa_destroy(&sdev->qp_xa);
86 xa_destroy(&sdev->mem_xa);
87 }
88
89 static int siw_create_tx_threads(void)
90 {
91 int cpu, assigned = 0;
92
93 for_each_online_cpu(cpu) {
94
95 if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
96 continue;
97
98 siw_tx_thread[cpu] =
99 kthread_create(siw_run_sq, (unsigned long *)(long)cpu,
100 "siw_tx/%d", cpu);
101 if (IS_ERR(siw_tx_thread[cpu])) {
102 siw_tx_thread[cpu] = NULL;
103 continue;
104 }
105 kthread_bind(siw_tx_thread[cpu], cpu);
106
107 wake_up_process(siw_tx_thread[cpu]);
108 assigned++;
109 }
110 return assigned;
111 }
112
113 static int siw_dev_qualified(struct net_device *netdev)
114 {
115
116
117
118
119
120 if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 ||
121 (netdev->type == ARPHRD_LOOPBACK && loopback_enabled))
122 return 1;
123
124 return 0;
125 }
126
127 static DEFINE_PER_CPU(atomic_t, siw_use_cnt);
128
129 static struct {
130 struct cpumask **tx_valid_cpus;
131 int num_nodes;
132 } siw_cpu_info;
133
134 static int siw_init_cpulist(void)
135 {
136 int i, num_nodes = num_possible_nodes();
137
138 memset(siw_tx_thread, 0, sizeof(siw_tx_thread));
139
140 siw_cpu_info.num_nodes = num_nodes;
141
142 siw_cpu_info.tx_valid_cpus =
143 kcalloc(num_nodes, sizeof(struct cpumask *), GFP_KERNEL);
144 if (!siw_cpu_info.tx_valid_cpus) {
145 siw_cpu_info.num_nodes = 0;
146 return -ENOMEM;
147 }
148 for (i = 0; i < siw_cpu_info.num_nodes; i++) {
149 siw_cpu_info.tx_valid_cpus[i] =
150 kzalloc(sizeof(struct cpumask), GFP_KERNEL);
151 if (!siw_cpu_info.tx_valid_cpus[i])
152 goto out_err;
153
154 cpumask_clear(siw_cpu_info.tx_valid_cpus[i]);
155 }
156 for_each_possible_cpu(i)
157 cpumask_set_cpu(i, siw_cpu_info.tx_valid_cpus[cpu_to_node(i)]);
158
159 return 0;
160
161 out_err:
162 siw_cpu_info.num_nodes = 0;
163 while (--i >= 0)
164 kfree(siw_cpu_info.tx_valid_cpus[i]);
165 kfree(siw_cpu_info.tx_valid_cpus);
166 siw_cpu_info.tx_valid_cpus = NULL;
167
168 return -ENOMEM;
169 }
170
171 static void siw_destroy_cpulist(void)
172 {
173 int i = 0;
174
175 while (i < siw_cpu_info.num_nodes)
176 kfree(siw_cpu_info.tx_valid_cpus[i++]);
177
178 kfree(siw_cpu_info.tx_valid_cpus);
179 }
180
181
182
183
184
185 int siw_get_tx_cpu(struct siw_device *sdev)
186 {
187 const struct cpumask *tx_cpumask;
188 int i, num_cpus, cpu, min_use, node = sdev->numa_node, tx_cpu = -1;
189
190 if (node < 0)
191 tx_cpumask = cpu_online_mask;
192 else
193 tx_cpumask = siw_cpu_info.tx_valid_cpus[node];
194
195 num_cpus = cpumask_weight(tx_cpumask);
196 if (!num_cpus) {
197
198 tx_cpumask = cpu_online_mask;
199 num_cpus = cpumask_weight(tx_cpumask);
200 }
201 if (!num_cpus)
202 goto out;
203
204 cpu = cpumask_first(tx_cpumask);
205
206 for (i = 0, min_use = SIW_MAX_QP; i < num_cpus;
207 i++, cpu = cpumask_next(cpu, tx_cpumask)) {
208 int usage;
209
210
211 if (!siw_tx_thread[cpu])
212 continue;
213
214 usage = atomic_read(&per_cpu(siw_use_cnt, cpu));
215 if (usage <= min_use) {
216 tx_cpu = cpu;
217 min_use = usage;
218 }
219 }
220 siw_dbg(&sdev->base_dev,
221 "tx cpu %d, node %d, %d qp's\n", tx_cpu, node, min_use);
222
223 out:
224 if (tx_cpu >= 0)
225 atomic_inc(&per_cpu(siw_use_cnt, tx_cpu));
226 else
227 pr_warn("siw: no tx cpu found\n");
228
229 return tx_cpu;
230 }
231
232 void siw_put_tx_cpu(int cpu)
233 {
234 atomic_dec(&per_cpu(siw_use_cnt, cpu));
235 }
236
237 static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id)
238 {
239 struct siw_qp *qp = siw_qp_id2obj(to_siw_dev(base_dev), id);
240
241 if (qp) {
242
243
244
245 siw_qp_put(qp);
246 return qp->ib_qp;
247 }
248 return NULL;
249 }
250
251 static const struct ib_device_ops siw_device_ops = {
252 .owner = THIS_MODULE,
253 .uverbs_abi_ver = SIW_ABI_VERSION,
254 .driver_id = RDMA_DRIVER_SIW,
255
256 .alloc_mr = siw_alloc_mr,
257 .alloc_pd = siw_alloc_pd,
258 .alloc_ucontext = siw_alloc_ucontext,
259 .create_cq = siw_create_cq,
260 .create_qp = siw_create_qp,
261 .create_srq = siw_create_srq,
262 .dealloc_driver = siw_device_cleanup,
263 .dealloc_pd = siw_dealloc_pd,
264 .dealloc_ucontext = siw_dealloc_ucontext,
265 .dereg_mr = siw_dereg_mr,
266 .destroy_cq = siw_destroy_cq,
267 .destroy_qp = siw_destroy_qp,
268 .destroy_srq = siw_destroy_srq,
269 .get_dma_mr = siw_get_dma_mr,
270 .get_port_immutable = siw_get_port_immutable,
271 .iw_accept = siw_accept,
272 .iw_add_ref = siw_qp_get_ref,
273 .iw_connect = siw_connect,
274 .iw_create_listen = siw_create_listen,
275 .iw_destroy_listen = siw_destroy_listen,
276 .iw_get_qp = siw_get_base_qp,
277 .iw_reject = siw_reject,
278 .iw_rem_ref = siw_qp_put_ref,
279 .map_mr_sg = siw_map_mr_sg,
280 .mmap = siw_mmap,
281 .modify_qp = siw_verbs_modify_qp,
282 .modify_srq = siw_modify_srq,
283 .poll_cq = siw_poll_cq,
284 .post_recv = siw_post_receive,
285 .post_send = siw_post_send,
286 .post_srq_recv = siw_post_srq_recv,
287 .query_device = siw_query_device,
288 .query_gid = siw_query_gid,
289 .query_pkey = siw_query_pkey,
290 .query_port = siw_query_port,
291 .query_qp = siw_query_qp,
292 .query_srq = siw_query_srq,
293 .req_notify_cq = siw_req_notify_cq,
294 .reg_user_mr = siw_reg_user_mr,
295
296 INIT_RDMA_OBJ_SIZE(ib_cq, siw_cq, base_cq),
297 INIT_RDMA_OBJ_SIZE(ib_pd, siw_pd, base_pd),
298 INIT_RDMA_OBJ_SIZE(ib_srq, siw_srq, base_srq),
299 INIT_RDMA_OBJ_SIZE(ib_ucontext, siw_ucontext, base_ucontext),
300 };
301
302 static struct siw_device *siw_device_create(struct net_device *netdev)
303 {
304 struct siw_device *sdev = NULL;
305 struct ib_device *base_dev;
306 struct device *parent = netdev->dev.parent;
307 int rv;
308
309 if (!parent) {
310
311
312
313
314
315
316
317 if (netdev->type != ARPHRD_LOOPBACK) {
318 pr_warn("siw: device %s error: no parent device\n",
319 netdev->name);
320 return NULL;
321 }
322 parent = &netdev->dev;
323 }
324 sdev = ib_alloc_device(siw_device, base_dev);
325 if (!sdev)
326 return NULL;
327
328 base_dev = &sdev->base_dev;
329
330 sdev->netdev = netdev;
331
332 if (netdev->type != ARPHRD_LOOPBACK) {
333 memcpy(&base_dev->node_guid, netdev->dev_addr, 6);
334 } else {
335
336
337
338
339 size_t gidlen = min_t(size_t, strlen(base_dev->name), 6);
340
341 memcpy(&base_dev->node_guid, base_dev->name, gidlen);
342 }
343 base_dev->uverbs_cmd_mask =
344 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
345 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
346 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
347 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
348 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
349 (1ull << IB_USER_VERBS_CMD_REG_MR) |
350 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
351 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
352 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
353 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
354 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
355 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
356 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
357 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
358 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
359 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
360 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
361 (1ull << IB_USER_VERBS_CMD_POST_RECV) |
362 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
363 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) |
364 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
365 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
366 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
367
368 base_dev->node_type = RDMA_NODE_RNIC;
369 memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON,
370 sizeof(SIW_NODE_DESC_COMMON));
371
372
373
374
375
376
377 base_dev->phys_port_cnt = 1;
378 base_dev->dev.parent = parent;
379 base_dev->dev.dma_ops = &dma_virt_ops;
380 base_dev->num_comp_vectors = num_possible_cpus();
381
382 xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
383 xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1);
384
385 ib_set_device_ops(base_dev, &siw_device_ops);
386 rv = ib_device_set_netdev(base_dev, netdev, 1);
387 if (rv)
388 goto error;
389
390 memcpy(base_dev->iw_ifname, netdev->name,
391 sizeof(base_dev->iw_ifname));
392
393
394 base_dev->iw_driver_flags = IW_F_NO_PORT_MAP,
395
396 sdev->attrs.max_qp = SIW_MAX_QP;
397 sdev->attrs.max_qp_wr = SIW_MAX_QP_WR;
398 sdev->attrs.max_ord = SIW_MAX_ORD_QP;
399 sdev->attrs.max_ird = SIW_MAX_IRD_QP;
400 sdev->attrs.max_sge = SIW_MAX_SGE;
401 sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD;
402 sdev->attrs.max_cq = SIW_MAX_CQ;
403 sdev->attrs.max_cqe = SIW_MAX_CQE;
404 sdev->attrs.max_mr = SIW_MAX_MR;
405 sdev->attrs.max_pd = SIW_MAX_PD;
406 sdev->attrs.max_mw = SIW_MAX_MW;
407 sdev->attrs.max_fmr = SIW_MAX_FMR;
408 sdev->attrs.max_srq = SIW_MAX_SRQ;
409 sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR;
410 sdev->attrs.max_srq_sge = SIW_MAX_SGE;
411
412 INIT_LIST_HEAD(&sdev->cep_list);
413 INIT_LIST_HEAD(&sdev->qp_list);
414
415 atomic_set(&sdev->num_ctx, 0);
416 atomic_set(&sdev->num_srq, 0);
417 atomic_set(&sdev->num_qp, 0);
418 atomic_set(&sdev->num_cq, 0);
419 atomic_set(&sdev->num_mr, 0);
420 atomic_set(&sdev->num_pd, 0);
421
422 sdev->numa_node = dev_to_node(parent);
423 spin_lock_init(&sdev->lock);
424
425 return sdev;
426 error:
427 ib_dealloc_device(base_dev);
428
429 return NULL;
430 }
431
432
433
434
435
436 static void siw_netdev_down(struct work_struct *work)
437 {
438 struct siw_device *sdev =
439 container_of(work, struct siw_device, netdev_down);
440
441 struct siw_qp_attrs qp_attrs;
442 struct list_head *pos, *tmp;
443
444 memset(&qp_attrs, 0, sizeof(qp_attrs));
445 qp_attrs.state = SIW_QP_STATE_ERROR;
446
447 list_for_each_safe(pos, tmp, &sdev->qp_list) {
448 struct siw_qp *qp = list_entry(pos, struct siw_qp, devq);
449
450 down_write(&qp->state_lock);
451 WARN_ON(siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE));
452 up_write(&qp->state_lock);
453 }
454 ib_device_put(&sdev->base_dev);
455 }
456
457 static void siw_device_goes_down(struct siw_device *sdev)
458 {
459 if (ib_device_try_get(&sdev->base_dev)) {
460 INIT_WORK(&sdev->netdev_down, siw_netdev_down);
461 schedule_work(&sdev->netdev_down);
462 }
463 }
464
465 static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
466 void *arg)
467 {
468 struct net_device *netdev = netdev_notifier_info_to_dev(arg);
469 struct ib_device *base_dev;
470 struct siw_device *sdev;
471
472 dev_dbg(&netdev->dev, "siw: event %lu\n", event);
473
474 if (dev_net(netdev) != &init_net)
475 return NOTIFY_OK;
476
477 base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW);
478 if (!base_dev)
479 return NOTIFY_OK;
480
481 sdev = to_siw_dev(base_dev);
482
483 switch (event) {
484 case NETDEV_UP:
485 sdev->state = IB_PORT_ACTIVE;
486 siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
487 break;
488
489 case NETDEV_GOING_DOWN:
490 siw_device_goes_down(sdev);
491 break;
492
493 case NETDEV_DOWN:
494 sdev->state = IB_PORT_DOWN;
495 siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
496 break;
497
498 case NETDEV_REGISTER:
499
500
501
502
503
504 siw_dbg(base_dev, "unexpected NETDEV_REGISTER event\n");
505 break;
506
507 case NETDEV_UNREGISTER:
508 ib_unregister_device_queued(&sdev->base_dev);
509 break;
510
511 case NETDEV_CHANGEADDR:
512 siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
513 break;
514
515
516
517 case NETDEV_CHANGEMTU:
518 case NETDEV_CHANGE:
519 break;
520
521 default:
522 break;
523 }
524 ib_device_put(&sdev->base_dev);
525
526 return NOTIFY_OK;
527 }
528
529 static struct notifier_block siw_netdev_nb = {
530 .notifier_call = siw_netdev_event,
531 };
532
533 static int siw_newlink(const char *basedev_name, struct net_device *netdev)
534 {
535 struct ib_device *base_dev;
536 struct siw_device *sdev = NULL;
537 int rv = -ENOMEM;
538
539 if (!siw_dev_qualified(netdev))
540 return -EINVAL;
541
542 base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW);
543 if (base_dev) {
544 ib_device_put(base_dev);
545 return -EEXIST;
546 }
547 sdev = siw_device_create(netdev);
548 if (sdev) {
549 dev_dbg(&netdev->dev, "siw: new device\n");
550
551 if (netif_running(netdev) && netif_carrier_ok(netdev))
552 sdev->state = IB_PORT_ACTIVE;
553 else
554 sdev->state = IB_PORT_DOWN;
555
556 rv = siw_device_register(sdev, basedev_name);
557 if (rv)
558 ib_dealloc_device(&sdev->base_dev);
559 }
560 return rv;
561 }
562
563 static struct rdma_link_ops siw_link_ops = {
564 .type = "siw",
565 .newlink = siw_newlink,
566 };
567
568
569
570
571
572 static __init int siw_init_module(void)
573 {
574 int rv;
575 int nr_cpu;
576
577 if (SENDPAGE_THRESH < SIW_MAX_INLINE) {
578 pr_info("siw: sendpage threshold too small: %u\n",
579 (int)SENDPAGE_THRESH);
580 rv = -EINVAL;
581 goto out_error;
582 }
583 rv = siw_init_cpulist();
584 if (rv)
585 goto out_error;
586
587 rv = siw_cm_init();
588 if (rv)
589 goto out_error;
590
591 if (!siw_create_tx_threads()) {
592 pr_info("siw: Could not start any TX thread\n");
593 rv = -ENOMEM;
594 goto out_error;
595 }
596
597
598
599
600 siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0);
601 if (IS_ERR(siw_crypto_shash)) {
602 pr_info("siw: Loading CRC32c failed: %ld\n",
603 PTR_ERR(siw_crypto_shash));
604 siw_crypto_shash = NULL;
605 if (mpa_crc_required) {
606 rv = -EOPNOTSUPP;
607 goto out_error;
608 }
609 }
610 rv = register_netdevice_notifier(&siw_netdev_nb);
611 if (rv)
612 goto out_error;
613
614 rdma_link_register(&siw_link_ops);
615
616 pr_info("SoftiWARP attached\n");
617 return 0;
618
619 out_error:
620 for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) {
621 if (siw_tx_thread[nr_cpu]) {
622 siw_stop_tx_thread(nr_cpu);
623 siw_tx_thread[nr_cpu] = NULL;
624 }
625 }
626 if (siw_crypto_shash)
627 crypto_free_shash(siw_crypto_shash);
628
629 pr_info("SoftIWARP attach failed. Error: %d\n", rv);
630
631 siw_cm_exit();
632 siw_destroy_cpulist();
633
634 return rv;
635 }
636
637 static void __exit siw_exit_module(void)
638 {
639 int cpu;
640
641 for_each_possible_cpu(cpu) {
642 if (siw_tx_thread[cpu]) {
643 siw_stop_tx_thread(cpu);
644 siw_tx_thread[cpu] = NULL;
645 }
646 }
647 unregister_netdevice_notifier(&siw_netdev_nb);
648 rdma_link_unregister(&siw_link_ops);
649 ib_unregister_driver(RDMA_DRIVER_SIW);
650
651 siw_cm_exit();
652
653 siw_destroy_cpulist();
654
655 if (siw_crypto_shash)
656 crypto_free_shash(siw_crypto_shash);
657
658 pr_info("SoftiWARP detached\n");
659 }
660
661 module_init(siw_init_module);
662 module_exit(siw_exit_module);
663
664 MODULE_ALIAS_RDMA_LINK("siw");