1#include <linux/module.h> 2 3#include <linux/moduleparam.h> 4#include <linux/sched.h> 5#include <linux/fs.h> 6#include <linux/blkdev.h> 7#include <linux/init.h> 8#include <linux/slab.h> 9#include <linux/blk-mq.h> 10#include <linux/hrtimer.h> 11 12struct nullb_cmd { 13 struct list_head list; 14 struct llist_node ll_list; 15 struct call_single_data csd; 16 struct request *rq; 17 struct bio *bio; 18 unsigned int tag; 19 struct nullb_queue *nq; 20}; 21 22struct nullb_queue { 23 unsigned long *tag_map; 24 wait_queue_head_t wait; 25 unsigned int queue_depth; 26 27 struct nullb_cmd *cmds; 28}; 29 30struct nullb { 31 struct list_head list; 32 unsigned int index; 33 struct request_queue *q; 34 struct gendisk *disk; 35 struct blk_mq_tag_set tag_set; 36 struct hrtimer timer; 37 unsigned int queue_depth; 38 spinlock_t lock; 39 40 struct nullb_queue *queues; 41 unsigned int nr_queues; 42}; 43 44static LIST_HEAD(nullb_list); 45static struct mutex lock; 46static int null_major; 47static int nullb_indexes; 48 49struct completion_queue { 50 struct llist_head list; 51 struct hrtimer timer; 52}; 53 54/* 55 * These are per-cpu for now, they will need to be configured by the 56 * complete_queues parameter and appropriately mapped. 57 */ 58static DEFINE_PER_CPU(struct completion_queue, completion_queues); 59 60enum { 61 NULL_IRQ_NONE = 0, 62 NULL_IRQ_SOFTIRQ = 1, 63 NULL_IRQ_TIMER = 2, 64}; 65 66enum { 67 NULL_Q_BIO = 0, 68 NULL_Q_RQ = 1, 69 NULL_Q_MQ = 2, 70}; 71 72static int submit_queues; 73module_param(submit_queues, int, S_IRUGO); 74MODULE_PARM_DESC(submit_queues, "Number of submission queues"); 75 76static int home_node = NUMA_NO_NODE; 77module_param(home_node, int, S_IRUGO); 78MODULE_PARM_DESC(home_node, "Home node for the device"); 79 80static int queue_mode = NULL_Q_MQ; 81 82static int null_param_store_val(const char *str, int *val, int min, int max) 83{ 84 int ret, new_val; 85 86 ret = kstrtoint(str, 10, &new_val); 87 if (ret) 88 return -EINVAL; 89 90 if (new_val < min || new_val > max) 91 return -EINVAL; 92 93 *val = new_val; 94 return 0; 95} 96 97static int null_set_queue_mode(const char *str, const struct kernel_param *kp) 98{ 99 return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ); 100} 101 102static struct kernel_param_ops null_queue_mode_param_ops = { 103 .set = null_set_queue_mode, 104 .get = param_get_int, 105}; 106 107device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO); 108MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); 109 110static int gb = 250; 111module_param(gb, int, S_IRUGO); 112MODULE_PARM_DESC(gb, "Size in GB"); 113 114static int bs = 512; 115module_param(bs, int, S_IRUGO); 116MODULE_PARM_DESC(bs, "Block size (in bytes)"); 117 118static int nr_devices = 2; 119module_param(nr_devices, int, S_IRUGO); 120MODULE_PARM_DESC(nr_devices, "Number of devices to register"); 121 122static int irqmode = NULL_IRQ_SOFTIRQ; 123 124static int null_set_irqmode(const char *str, const struct kernel_param *kp) 125{ 126 return null_param_store_val(str, &irqmode, NULL_IRQ_NONE, 127 NULL_IRQ_TIMER); 128} 129 130static struct kernel_param_ops null_irqmode_param_ops = { 131 .set = null_set_irqmode, 132 .get = param_get_int, 133}; 134 135device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO); 136MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); 137 138static int completion_nsec = 10000; 139module_param(completion_nsec, int, S_IRUGO); 140MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); 141 142static int hw_queue_depth = 64; 143module_param(hw_queue_depth, int, S_IRUGO); 144MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); 145 146static bool use_per_node_hctx = false; 147module_param(use_per_node_hctx, bool, S_IRUGO); 148MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); 149 150static void put_tag(struct nullb_queue *nq, unsigned int tag) 151{ 152 clear_bit_unlock(tag, nq->tag_map); 153 154 if (waitqueue_active(&nq->wait)) 155 wake_up(&nq->wait); 156} 157 158static unsigned int get_tag(struct nullb_queue *nq) 159{ 160 unsigned int tag; 161 162 do { 163 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); 164 if (tag >= nq->queue_depth) 165 return -1U; 166 } while (test_and_set_bit_lock(tag, nq->tag_map)); 167 168 return tag; 169} 170 171static void free_cmd(struct nullb_cmd *cmd) 172{ 173 put_tag(cmd->nq, cmd->tag); 174} 175 176static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) 177{ 178 struct nullb_cmd *cmd; 179 unsigned int tag; 180 181 tag = get_tag(nq); 182 if (tag != -1U) { 183 cmd = &nq->cmds[tag]; 184 cmd->tag = tag; 185 cmd->nq = nq; 186 return cmd; 187 } 188 189 return NULL; 190} 191 192static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) 193{ 194 struct nullb_cmd *cmd; 195 DEFINE_WAIT(wait); 196 197 cmd = __alloc_cmd(nq); 198 if (cmd || !can_wait) 199 return cmd; 200 201 do { 202 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); 203 cmd = __alloc_cmd(nq); 204 if (cmd) 205 break; 206 207 io_schedule(); 208 } while (1); 209 210 finish_wait(&nq->wait, &wait); 211 return cmd; 212} 213 214static void end_cmd(struct nullb_cmd *cmd) 215{ 216 switch (queue_mode) { 217 case NULL_Q_MQ: 218 blk_mq_end_request(cmd->rq, 0); 219 return; 220 case NULL_Q_RQ: 221 INIT_LIST_HEAD(&cmd->rq->queuelist); 222 blk_end_request_all(cmd->rq, 0); 223 break; 224 case NULL_Q_BIO: 225 bio_endio(cmd->bio, 0); 226 break; 227 } 228 229 free_cmd(cmd); 230} 231 232static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) 233{ 234 struct completion_queue *cq; 235 struct llist_node *entry; 236 struct nullb_cmd *cmd; 237 238 cq = &per_cpu(completion_queues, smp_processor_id()); 239 240 while ((entry = llist_del_all(&cq->list)) != NULL) { 241 entry = llist_reverse_order(entry); 242 do { 243 cmd = container_of(entry, struct nullb_cmd, ll_list); 244 entry = entry->next; 245 end_cmd(cmd); 246 } while (entry); 247 } 248 249 return HRTIMER_NORESTART; 250} 251 252static void null_cmd_end_timer(struct nullb_cmd *cmd) 253{ 254 struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); 255 256 cmd->ll_list.next = NULL; 257 if (llist_add(&cmd->ll_list, &cq->list)) { 258 ktime_t kt = ktime_set(0, completion_nsec); 259 260 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); 261 } 262 263 put_cpu(); 264} 265 266static void null_softirq_done_fn(struct request *rq) 267{ 268 if (queue_mode == NULL_Q_MQ) 269 end_cmd(blk_mq_rq_to_pdu(rq)); 270 else 271 end_cmd(rq->special); 272} 273 274static inline void null_handle_cmd(struct nullb_cmd *cmd) 275{ 276 /* Complete IO by inline, softirq or timer */ 277 switch (irqmode) { 278 case NULL_IRQ_SOFTIRQ: 279 switch (queue_mode) { 280 case NULL_Q_MQ: 281 blk_mq_complete_request(cmd->rq); 282 break; 283 case NULL_Q_RQ: 284 blk_complete_request(cmd->rq); 285 break; 286 case NULL_Q_BIO: 287 /* 288 * XXX: no proper submitting cpu information available. 289 */ 290 end_cmd(cmd); 291 break; 292 } 293 break; 294 case NULL_IRQ_NONE: 295 end_cmd(cmd); 296 break; 297 case NULL_IRQ_TIMER: 298 null_cmd_end_timer(cmd); 299 break; 300 } 301} 302 303static struct nullb_queue *nullb_to_queue(struct nullb *nullb) 304{ 305 int index = 0; 306 307 if (nullb->nr_queues != 1) 308 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); 309 310 return &nullb->queues[index]; 311} 312 313static void null_queue_bio(struct request_queue *q, struct bio *bio) 314{ 315 struct nullb *nullb = q->queuedata; 316 struct nullb_queue *nq = nullb_to_queue(nullb); 317 struct nullb_cmd *cmd; 318 319 cmd = alloc_cmd(nq, 1); 320 cmd->bio = bio; 321 322 null_handle_cmd(cmd); 323} 324 325static int null_rq_prep_fn(struct request_queue *q, struct request *req) 326{ 327 struct nullb *nullb = q->queuedata; 328 struct nullb_queue *nq = nullb_to_queue(nullb); 329 struct nullb_cmd *cmd; 330 331 cmd = alloc_cmd(nq, 0); 332 if (cmd) { 333 cmd->rq = req; 334 req->special = cmd; 335 return BLKPREP_OK; 336 } 337 338 return BLKPREP_DEFER; 339} 340 341static void null_request_fn(struct request_queue *q) 342{ 343 struct request *rq; 344 345 while ((rq = blk_fetch_request(q)) != NULL) { 346 struct nullb_cmd *cmd = rq->special; 347 348 spin_unlock_irq(q->queue_lock); 349 null_handle_cmd(cmd); 350 spin_lock_irq(q->queue_lock); 351 } 352} 353 354static int null_queue_rq(struct blk_mq_hw_ctx *hctx, 355 const struct blk_mq_queue_data *bd) 356{ 357 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 358 359 cmd->rq = bd->rq; 360 cmd->nq = hctx->driver_data; 361 362 blk_mq_start_request(bd->rq); 363 364 null_handle_cmd(cmd); 365 return BLK_MQ_RQ_QUEUE_OK; 366} 367 368static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) 369{ 370 BUG_ON(!nullb); 371 BUG_ON(!nq); 372 373 init_waitqueue_head(&nq->wait); 374 nq->queue_depth = nullb->queue_depth; 375} 376 377static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 378 unsigned int index) 379{ 380 struct nullb *nullb = data; 381 struct nullb_queue *nq = &nullb->queues[index]; 382 383 hctx->driver_data = nq; 384 null_init_queue(nullb, nq); 385 nullb->nr_queues++; 386 387 return 0; 388} 389 390static struct blk_mq_ops null_mq_ops = { 391 .queue_rq = null_queue_rq, 392 .map_queue = blk_mq_map_queue, 393 .init_hctx = null_init_hctx, 394 .complete = null_softirq_done_fn, 395}; 396 397static void null_del_dev(struct nullb *nullb) 398{ 399 list_del_init(&nullb->list); 400 401 del_gendisk(nullb->disk); 402 blk_cleanup_queue(nullb->q); 403 if (queue_mode == NULL_Q_MQ) 404 blk_mq_free_tag_set(&nullb->tag_set); 405 put_disk(nullb->disk); 406 kfree(nullb); 407} 408 409static int null_open(struct block_device *bdev, fmode_t mode) 410{ 411 return 0; 412} 413 414static void null_release(struct gendisk *disk, fmode_t mode) 415{ 416} 417 418static const struct block_device_operations null_fops = { 419 .owner = THIS_MODULE, 420 .open = null_open, 421 .release = null_release, 422}; 423 424static int setup_commands(struct nullb_queue *nq) 425{ 426 struct nullb_cmd *cmd; 427 int i, tag_size; 428 429 nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); 430 if (!nq->cmds) 431 return -ENOMEM; 432 433 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; 434 nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); 435 if (!nq->tag_map) { 436 kfree(nq->cmds); 437 return -ENOMEM; 438 } 439 440 for (i = 0; i < nq->queue_depth; i++) { 441 cmd = &nq->cmds[i]; 442 INIT_LIST_HEAD(&cmd->list); 443 cmd->ll_list.next = NULL; 444 cmd->tag = -1U; 445 } 446 447 return 0; 448} 449 450static void cleanup_queue(struct nullb_queue *nq) 451{ 452 kfree(nq->tag_map); 453 kfree(nq->cmds); 454} 455 456static void cleanup_queues(struct nullb *nullb) 457{ 458 int i; 459 460 for (i = 0; i < nullb->nr_queues; i++) 461 cleanup_queue(&nullb->queues[i]); 462 463 kfree(nullb->queues); 464} 465 466static int setup_queues(struct nullb *nullb) 467{ 468 nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), 469 GFP_KERNEL); 470 if (!nullb->queues) 471 return -ENOMEM; 472 473 nullb->nr_queues = 0; 474 nullb->queue_depth = hw_queue_depth; 475 476 return 0; 477} 478 479static int init_driver_queues(struct nullb *nullb) 480{ 481 struct nullb_queue *nq; 482 int i, ret = 0; 483 484 for (i = 0; i < submit_queues; i++) { 485 nq = &nullb->queues[i]; 486 487 null_init_queue(nullb, nq); 488 489 ret = setup_commands(nq); 490 if (ret) 491 return ret; 492 nullb->nr_queues++; 493 } 494 return 0; 495} 496 497static int null_add_dev(void) 498{ 499 struct gendisk *disk; 500 struct nullb *nullb; 501 sector_t size; 502 int rv; 503 504 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); 505 if (!nullb) { 506 rv = -ENOMEM; 507 goto out; 508 } 509 510 spin_lock_init(&nullb->lock); 511 512 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) 513 submit_queues = nr_online_nodes; 514 515 rv = setup_queues(nullb); 516 if (rv) 517 goto out_free_nullb; 518 519 if (queue_mode == NULL_Q_MQ) { 520 nullb->tag_set.ops = &null_mq_ops; 521 nullb->tag_set.nr_hw_queues = submit_queues; 522 nullb->tag_set.queue_depth = hw_queue_depth; 523 nullb->tag_set.numa_node = home_node; 524 nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); 525 nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 526 nullb->tag_set.driver_data = nullb; 527 528 rv = blk_mq_alloc_tag_set(&nullb->tag_set); 529 if (rv) 530 goto out_cleanup_queues; 531 532 nullb->q = blk_mq_init_queue(&nullb->tag_set); 533 if (IS_ERR(nullb->q)) { 534 rv = -ENOMEM; 535 goto out_cleanup_tags; 536 } 537 } else if (queue_mode == NULL_Q_BIO) { 538 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); 539 if (!nullb->q) { 540 rv = -ENOMEM; 541 goto out_cleanup_queues; 542 } 543 blk_queue_make_request(nullb->q, null_queue_bio); 544 rv = init_driver_queues(nullb); 545 if (rv) 546 goto out_cleanup_blk_queue; 547 } else { 548 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); 549 if (!nullb->q) { 550 rv = -ENOMEM; 551 goto out_cleanup_queues; 552 } 553 blk_queue_prep_rq(nullb->q, null_rq_prep_fn); 554 blk_queue_softirq_done(nullb->q, null_softirq_done_fn); 555 rv = init_driver_queues(nullb); 556 if (rv) 557 goto out_cleanup_blk_queue; 558 } 559 560 nullb->q->queuedata = nullb; 561 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); 562 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); 563 564 disk = nullb->disk = alloc_disk_node(1, home_node); 565 if (!disk) { 566 rv = -ENOMEM; 567 goto out_cleanup_blk_queue; 568 } 569 570 mutex_lock(&lock); 571 list_add_tail(&nullb->list, &nullb_list); 572 nullb->index = nullb_indexes++; 573 mutex_unlock(&lock); 574 575 blk_queue_logical_block_size(nullb->q, bs); 576 blk_queue_physical_block_size(nullb->q, bs); 577 578 size = gb * 1024 * 1024 * 1024ULL; 579 sector_div(size, bs); 580 set_capacity(disk, size); 581 582 disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; 583 disk->major = null_major; 584 disk->first_minor = nullb->index; 585 disk->fops = &null_fops; 586 disk->private_data = nullb; 587 disk->queue = nullb->q; 588 sprintf(disk->disk_name, "nullb%d", nullb->index); 589 add_disk(disk); 590 return 0; 591 592out_cleanup_blk_queue: 593 blk_cleanup_queue(nullb->q); 594out_cleanup_tags: 595 if (queue_mode == NULL_Q_MQ) 596 blk_mq_free_tag_set(&nullb->tag_set); 597out_cleanup_queues: 598 cleanup_queues(nullb); 599out_free_nullb: 600 kfree(nullb); 601out: 602 return rv; 603} 604 605static int __init null_init(void) 606{ 607 unsigned int i; 608 609 if (bs > PAGE_SIZE) { 610 pr_warn("null_blk: invalid block size\n"); 611 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); 612 bs = PAGE_SIZE; 613 } 614 615 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 616 if (submit_queues < nr_online_nodes) { 617 pr_warn("null_blk: submit_queues param is set to %u.", 618 nr_online_nodes); 619 submit_queues = nr_online_nodes; 620 } 621 } else if (submit_queues > nr_cpu_ids) 622 submit_queues = nr_cpu_ids; 623 else if (!submit_queues) 624 submit_queues = 1; 625 626 mutex_init(&lock); 627 628 /* Initialize a separate list for each CPU for issuing softirqs */ 629 for_each_possible_cpu(i) { 630 struct completion_queue *cq = &per_cpu(completion_queues, i); 631 632 init_llist_head(&cq->list); 633 634 if (irqmode != NULL_IRQ_TIMER) 635 continue; 636 637 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 638 cq->timer.function = null_cmd_timer_expired; 639 } 640 641 null_major = register_blkdev(0, "nullb"); 642 if (null_major < 0) 643 return null_major; 644 645 for (i = 0; i < nr_devices; i++) { 646 if (null_add_dev()) { 647 unregister_blkdev(null_major, "nullb"); 648 return -EINVAL; 649 } 650 } 651 652 pr_info("null: module loaded\n"); 653 return 0; 654} 655 656static void __exit null_exit(void) 657{ 658 struct nullb *nullb; 659 660 unregister_blkdev(null_major, "nullb"); 661 662 mutex_lock(&lock); 663 while (!list_empty(&nullb_list)) { 664 nullb = list_entry(nullb_list.next, struct nullb, list); 665 null_del_dev(nullb); 666 } 667 mutex_unlock(&lock); 668} 669 670module_init(null_init); 671module_exit(null_exit); 672 673MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>"); 674MODULE_LICENSE("GPL"); 675