1/* 2 * Copyright (C) 2013 Shaohua Li <shli@kernel.org> 3 * Copyright (C) 2014 Red Hat, Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program; if not, write to the Free Software Foundation, Inc., 16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 17 */ 18 19#include <linux/spinlock.h> 20#include <linux/module.h> 21#include <linux/idr.h> 22#include <linux/timer.h> 23#include <linux/parser.h> 24#include <scsi/scsi.h> 25#include <scsi/scsi_host.h> 26#include <linux/uio_driver.h> 27#include <net/genetlink.h> 28#include <target/target_core_base.h> 29#include <target/target_core_fabric.h> 30#include <target/target_core_backend.h> 31#include <target/target_core_backend_configfs.h> 32 33#include <linux/target_core_user.h> 34 35/* 36 * Define a shared-memory interface for LIO to pass SCSI commands and 37 * data to userspace for processing. This is to allow backends that 38 * are too complex for in-kernel support to be possible. 39 * 40 * It uses the UIO framework to do a lot of the device-creation and 41 * introspection work for us. 42 * 43 * See the .h file for how the ring is laid out. Note that while the 44 * command ring is defined, the particulars of the data area are 45 * not. Offset values in the command entry point to other locations 46 * internal to the mmap()ed area. There is separate space outside the 47 * command ring for data buffers. This leaves maximum flexibility for 48 * moving buffer allocations, or even page flipping or other 49 * allocation techniques, without altering the command ring layout. 50 * 51 * SECURITY: 52 * The user process must be assumed to be malicious. There's no way to 53 * prevent it breaking the command ring protocol if it wants, but in 54 * order to prevent other issues we must only ever read *data* from 55 * the shared memory area, not offsets or sizes. This applies to 56 * command ring entries as well as the mailbox. Extra code needed for 57 * this may have a 'UAM' comment. 58 */ 59 60 61#define TCMU_TIME_OUT (30 * MSEC_PER_SEC) 62 63#define CMDR_SIZE (16 * 4096) 64#define DATA_SIZE (257 * 4096) 65 66#define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE) 67 68static struct device *tcmu_root_device; 69 70struct tcmu_hba { 71 u32 host_id; 72}; 73 74#define TCMU_CONFIG_LEN 256 75 76struct tcmu_dev { 77 struct se_device se_dev; 78 79 char *name; 80 struct se_hba *hba; 81 82#define TCMU_DEV_BIT_OPEN 0 83#define TCMU_DEV_BIT_BROKEN 1 84 unsigned long flags; 85 86 struct uio_info uio_info; 87 88 struct tcmu_mailbox *mb_addr; 89 size_t dev_size; 90 u32 cmdr_size; 91 u32 cmdr_last_cleaned; 92 /* Offset of data ring from start of mb */ 93 size_t data_off; 94 size_t data_size; 95 /* Ring head + tail values. */ 96 /* Must add data_off and mb_addr to get the address */ 97 size_t data_head; 98 size_t data_tail; 99 100 wait_queue_head_t wait_cmdr; 101 /* TODO should this be a mutex? */ 102 spinlock_t cmdr_lock; 103 104 struct idr commands; 105 spinlock_t commands_lock; 106 107 struct timer_list timeout; 108 109 char dev_config[TCMU_CONFIG_LEN]; 110}; 111 112#define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev) 113 114#define CMDR_OFF sizeof(struct tcmu_mailbox) 115 116struct tcmu_cmd { 117 struct se_cmd *se_cmd; 118 struct tcmu_dev *tcmu_dev; 119 120 uint16_t cmd_id; 121 122 /* Can't use se_cmd->data_length when cleaning up expired cmds, because if 123 cmd has been completed then accessing se_cmd is off limits */ 124 size_t data_length; 125 126 unsigned long deadline; 127 128#define TCMU_CMD_BIT_EXPIRED 0 129 unsigned long flags; 130}; 131 132static struct kmem_cache *tcmu_cmd_cache; 133 134/* multicast group */ 135enum tcmu_multicast_groups { 136 TCMU_MCGRP_CONFIG, 137}; 138 139static const struct genl_multicast_group tcmu_mcgrps[] = { 140 [TCMU_MCGRP_CONFIG] = { .name = "config", }, 141}; 142 143/* Our generic netlink family */ 144static struct genl_family tcmu_genl_family = { 145 .id = GENL_ID_GENERATE, 146 .hdrsize = 0, 147 .name = "TCM-USER", 148 .version = 1, 149 .maxattr = TCMU_ATTR_MAX, 150 .mcgrps = tcmu_mcgrps, 151 .n_mcgrps = ARRAY_SIZE(tcmu_mcgrps), 152}; 153 154static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) 155{ 156 struct se_device *se_dev = se_cmd->se_dev; 157 struct tcmu_dev *udev = TCMU_DEV(se_dev); 158 struct tcmu_cmd *tcmu_cmd; 159 int cmd_id; 160 161 tcmu_cmd = kmem_cache_zalloc(tcmu_cmd_cache, GFP_KERNEL); 162 if (!tcmu_cmd) 163 return NULL; 164 165 tcmu_cmd->se_cmd = se_cmd; 166 tcmu_cmd->tcmu_dev = udev; 167 tcmu_cmd->data_length = se_cmd->data_length; 168 169 tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT); 170 171 idr_preload(GFP_KERNEL); 172 spin_lock_irq(&udev->commands_lock); 173 cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0, 174 USHRT_MAX, GFP_NOWAIT); 175 spin_unlock_irq(&udev->commands_lock); 176 idr_preload_end(); 177 178 if (cmd_id < 0) { 179 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 180 return NULL; 181 } 182 tcmu_cmd->cmd_id = cmd_id; 183 184 return tcmu_cmd; 185} 186 187static inline void tcmu_flush_dcache_range(void *vaddr, size_t size) 188{ 189 unsigned long offset = (unsigned long) vaddr & ~PAGE_MASK; 190 191 size = round_up(size+offset, PAGE_SIZE); 192 vaddr -= offset; 193 194 while (size) { 195 flush_dcache_page(virt_to_page(vaddr)); 196 size -= PAGE_SIZE; 197 } 198} 199 200/* 201 * Some ring helper functions. We don't assume size is a power of 2 so 202 * we can't use circ_buf.h. 203 */ 204static inline size_t spc_used(size_t head, size_t tail, size_t size) 205{ 206 int diff = head - tail; 207 208 if (diff >= 0) 209 return diff; 210 else 211 return size + diff; 212} 213 214static inline size_t spc_free(size_t head, size_t tail, size_t size) 215{ 216 /* Keep 1 byte unused or we can't tell full from empty */ 217 return (size - spc_used(head, tail, size) - 1); 218} 219 220static inline size_t head_to_end(size_t head, size_t size) 221{ 222 return size - head; 223} 224 225#define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) 226 227/* 228 * We can't queue a command until we have space available on the cmd ring *and* space 229 * space avail on the data ring. 230 * 231 * Called with ring lock held. 232 */ 233static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed) 234{ 235 struct tcmu_mailbox *mb = udev->mb_addr; 236 size_t space; 237 u32 cmd_head; 238 size_t cmd_needed; 239 240 tcmu_flush_dcache_range(mb, sizeof(*mb)); 241 242 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ 243 244 /* 245 * If cmd end-of-ring space is too small then we need space for a NOP plus 246 * original cmd - cmds are internally contiguous. 247 */ 248 if (head_to_end(cmd_head, udev->cmdr_size) >= cmd_size) 249 cmd_needed = cmd_size; 250 else 251 cmd_needed = cmd_size + head_to_end(cmd_head, udev->cmdr_size); 252 253 space = spc_free(cmd_head, udev->cmdr_last_cleaned, udev->cmdr_size); 254 if (space < cmd_needed) { 255 pr_debug("no cmd space: %u %u %u\n", cmd_head, 256 udev->cmdr_last_cleaned, udev->cmdr_size); 257 return false; 258 } 259 260 space = spc_free(udev->data_head, udev->data_tail, udev->data_size); 261 if (space < data_needed) { 262 pr_debug("no data space: %zu %zu %zu\n", udev->data_head, 263 udev->data_tail, udev->data_size); 264 return false; 265 } 266 267 return true; 268} 269 270static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) 271{ 272 struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; 273 struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 274 size_t base_command_size, command_size; 275 struct tcmu_mailbox *mb; 276 struct tcmu_cmd_entry *entry; 277 int i; 278 struct scatterlist *sg; 279 struct iovec *iov; 280 int iov_cnt = 0; 281 uint32_t cmd_head; 282 uint64_t cdb_off; 283 284 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) 285 return -EINVAL; 286 287 /* 288 * Must be a certain minimum size for response sense info, but 289 * also may be larger if the iov array is large. 290 * 291 * iovs = sgl_nents+1, for end-of-ring case, plus another 1 292 * b/c size == offsetof one-past-element. 293 */ 294 base_command_size = max(offsetof(struct tcmu_cmd_entry, 295 req.iov[se_cmd->t_data_nents + 2]), 296 sizeof(struct tcmu_cmd_entry)); 297 command_size = base_command_size 298 + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE); 299 300 WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1)); 301 302 spin_lock_irq(&udev->cmdr_lock); 303 304 mb = udev->mb_addr; 305 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ 306 if ((command_size > (udev->cmdr_size / 2)) 307 || tcmu_cmd->data_length > (udev->data_size - 1)) 308 pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu " 309 "cmd/data ring buffers\n", command_size, tcmu_cmd->data_length, 310 udev->cmdr_size, udev->data_size); 311 312 while (!is_ring_space_avail(udev, command_size, tcmu_cmd->data_length)) { 313 int ret; 314 DEFINE_WAIT(__wait); 315 316 prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE); 317 318 pr_debug("sleeping for ring space\n"); 319 spin_unlock_irq(&udev->cmdr_lock); 320 ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT)); 321 finish_wait(&udev->wait_cmdr, &__wait); 322 if (!ret) { 323 pr_warn("tcmu: command timed out\n"); 324 return -ETIMEDOUT; 325 } 326 327 spin_lock_irq(&udev->cmdr_lock); 328 329 /* We dropped cmdr_lock, cmd_head is stale */ 330 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ 331 } 332 333 /* Insert a PAD if end-of-ring space is too small */ 334 if (head_to_end(cmd_head, udev->cmdr_size) < command_size) { 335 size_t pad_size = head_to_end(cmd_head, udev->cmdr_size); 336 337 entry = (void *) mb + CMDR_OFF + cmd_head; 338 tcmu_flush_dcache_range(entry, sizeof(*entry)); 339 tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD); 340 tcmu_hdr_set_len(&entry->hdr.len_op, pad_size); 341 entry->hdr.cmd_id = 0; /* not used for PAD */ 342 entry->hdr.kflags = 0; 343 entry->hdr.uflags = 0; 344 345 UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size); 346 347 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ 348 WARN_ON(cmd_head != 0); 349 } 350 351 entry = (void *) mb + CMDR_OFF + cmd_head; 352 tcmu_flush_dcache_range(entry, sizeof(*entry)); 353 tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD); 354 tcmu_hdr_set_len(&entry->hdr.len_op, command_size); 355 entry->hdr.cmd_id = tcmu_cmd->cmd_id; 356 entry->hdr.kflags = 0; 357 entry->hdr.uflags = 0; 358 359 /* 360 * Fix up iovecs, and handle if allocation in data ring wrapped. 361 */ 362 iov = &entry->req.iov[0]; 363 for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) { 364 size_t copy_bytes = min((size_t)sg->length, 365 head_to_end(udev->data_head, udev->data_size)); 366 void *from = kmap_atomic(sg_page(sg)) + sg->offset; 367 void *to = (void *) mb + udev->data_off + udev->data_head; 368 369 if (tcmu_cmd->se_cmd->data_direction == DMA_TO_DEVICE) { 370 memcpy(to, from, copy_bytes); 371 tcmu_flush_dcache_range(to, copy_bytes); 372 } 373 374 /* Even iov_base is relative to mb_addr */ 375 iov->iov_len = copy_bytes; 376 iov->iov_base = (void __user *) udev->data_off + 377 udev->data_head; 378 iov_cnt++; 379 iov++; 380 381 UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); 382 383 /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */ 384 if (sg->length != copy_bytes) { 385 from += copy_bytes; 386 copy_bytes = sg->length - copy_bytes; 387 388 iov->iov_len = copy_bytes; 389 iov->iov_base = (void __user *) udev->data_off + 390 udev->data_head; 391 392 if (se_cmd->data_direction == DMA_TO_DEVICE) { 393 to = (void *) mb + udev->data_off + udev->data_head; 394 memcpy(to, from, copy_bytes); 395 tcmu_flush_dcache_range(to, copy_bytes); 396 } 397 398 iov_cnt++; 399 iov++; 400 401 UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); 402 } 403 404 kunmap_atomic(from); 405 } 406 entry->req.iov_cnt = iov_cnt; 407 entry->req.iov_bidi_cnt = 0; 408 entry->req.iov_dif_cnt = 0; 409 410 /* All offsets relative to mb_addr, not start of entry! */ 411 cdb_off = CMDR_OFF + cmd_head + base_command_size; 412 memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb)); 413 entry->req.cdb_off = cdb_off; 414 tcmu_flush_dcache_range(entry, sizeof(*entry)); 415 416 UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size); 417 tcmu_flush_dcache_range(mb, sizeof(*mb)); 418 419 spin_unlock_irq(&udev->cmdr_lock); 420 421 /* TODO: only if FLUSH and FUA? */ 422 uio_event_notify(&udev->uio_info); 423 424 mod_timer(&udev->timeout, 425 round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT))); 426 427 return 0; 428} 429 430static int tcmu_queue_cmd(struct se_cmd *se_cmd) 431{ 432 struct se_device *se_dev = se_cmd->se_dev; 433 struct tcmu_dev *udev = TCMU_DEV(se_dev); 434 struct tcmu_cmd *tcmu_cmd; 435 int ret; 436 437 tcmu_cmd = tcmu_alloc_cmd(se_cmd); 438 if (!tcmu_cmd) 439 return -ENOMEM; 440 441 ret = tcmu_queue_cmd_ring(tcmu_cmd); 442 if (ret < 0) { 443 pr_err("TCMU: Could not queue command\n"); 444 spin_lock_irq(&udev->commands_lock); 445 idr_remove(&udev->commands, tcmu_cmd->cmd_id); 446 spin_unlock_irq(&udev->commands_lock); 447 448 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 449 } 450 451 return ret; 452} 453 454static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry) 455{ 456 struct se_cmd *se_cmd = cmd->se_cmd; 457 struct tcmu_dev *udev = cmd->tcmu_dev; 458 459 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { 460 /* cmd has been completed already from timeout, just reclaim data 461 ring space */ 462 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); 463 return; 464 } 465 466 if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) { 467 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); 468 pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", 469 cmd->se_cmd); 470 transport_generic_request_failure(cmd->se_cmd, 471 TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE); 472 cmd->se_cmd = NULL; 473 kmem_cache_free(tcmu_cmd_cache, cmd); 474 return; 475 } 476 477 if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { 478 memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer, 479 se_cmd->scsi_sense_length); 480 481 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); 482 } 483 else if (se_cmd->data_direction == DMA_FROM_DEVICE) { 484 struct scatterlist *sg; 485 int i; 486 487 /* It'd be easier to look at entry's iovec again, but UAM */ 488 for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) { 489 size_t copy_bytes; 490 void *to; 491 void *from; 492 493 copy_bytes = min((size_t)sg->length, 494 head_to_end(udev->data_tail, udev->data_size)); 495 496 to = kmap_atomic(sg_page(sg)) + sg->offset; 497 WARN_ON(sg->length + sg->offset > PAGE_SIZE); 498 from = (void *) udev->mb_addr + udev->data_off + udev->data_tail; 499 tcmu_flush_dcache_range(from, copy_bytes); 500 memcpy(to, from, copy_bytes); 501 502 UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); 503 504 /* Uh oh, wrapped the data buffer for this sg's data */ 505 if (sg->length != copy_bytes) { 506 from = (void *) udev->mb_addr + udev->data_off + udev->data_tail; 507 WARN_ON(udev->data_tail); 508 to += copy_bytes; 509 copy_bytes = sg->length - copy_bytes; 510 tcmu_flush_dcache_range(from, copy_bytes); 511 memcpy(to, from, copy_bytes); 512 513 UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); 514 } 515 516 kunmap_atomic(to); 517 } 518 519 } else if (se_cmd->data_direction == DMA_TO_DEVICE) { 520 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); 521 } else { 522 pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction); 523 } 524 525 target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); 526 cmd->se_cmd = NULL; 527 528 kmem_cache_free(tcmu_cmd_cache, cmd); 529} 530 531static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) 532{ 533 struct tcmu_mailbox *mb; 534 LIST_HEAD(cpl_cmds); 535 unsigned long flags; 536 int handled = 0; 537 538 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { 539 pr_err("ring broken, not handling completions\n"); 540 return 0; 541 } 542 543 spin_lock_irqsave(&udev->cmdr_lock, flags); 544 545 mb = udev->mb_addr; 546 tcmu_flush_dcache_range(mb, sizeof(*mb)); 547 548 while (udev->cmdr_last_cleaned != ACCESS_ONCE(mb->cmd_tail)) { 549 550 struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned; 551 struct tcmu_cmd *cmd; 552 553 tcmu_flush_dcache_range(entry, sizeof(*entry)); 554 555 if (tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_PAD) { 556 UPDATE_HEAD(udev->cmdr_last_cleaned, 557 tcmu_hdr_get_len(entry->hdr.len_op), 558 udev->cmdr_size); 559 continue; 560 } 561 WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD); 562 563 spin_lock(&udev->commands_lock); 564 cmd = idr_find(&udev->commands, entry->hdr.cmd_id); 565 if (cmd) 566 idr_remove(&udev->commands, cmd->cmd_id); 567 spin_unlock(&udev->commands_lock); 568 569 if (!cmd) { 570 pr_err("cmd_id not found, ring is broken\n"); 571 set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags); 572 break; 573 } 574 575 tcmu_handle_completion(cmd, entry); 576 577 UPDATE_HEAD(udev->cmdr_last_cleaned, 578 tcmu_hdr_get_len(entry->hdr.len_op), 579 udev->cmdr_size); 580 581 handled++; 582 } 583 584 if (mb->cmd_tail == mb->cmd_head) 585 del_timer(&udev->timeout); /* no more pending cmds */ 586 587 spin_unlock_irqrestore(&udev->cmdr_lock, flags); 588 589 wake_up(&udev->wait_cmdr); 590 591 return handled; 592} 593 594static int tcmu_check_expired_cmd(int id, void *p, void *data) 595{ 596 struct tcmu_cmd *cmd = p; 597 598 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) 599 return 0; 600 601 if (!time_after(cmd->deadline, jiffies)) 602 return 0; 603 604 set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags); 605 target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION); 606 cmd->se_cmd = NULL; 607 608 kmem_cache_free(tcmu_cmd_cache, cmd); 609 610 return 0; 611} 612 613static void tcmu_device_timedout(unsigned long data) 614{ 615 struct tcmu_dev *udev = (struct tcmu_dev *)data; 616 unsigned long flags; 617 int handled; 618 619 handled = tcmu_handle_completions(udev); 620 621 pr_warn("%d completions handled from timeout\n", handled); 622 623 spin_lock_irqsave(&udev->commands_lock, flags); 624 idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); 625 spin_unlock_irqrestore(&udev->commands_lock, flags); 626 627 /* 628 * We don't need to wakeup threads on wait_cmdr since they have their 629 * own timeout. 630 */ 631} 632 633static int tcmu_attach_hba(struct se_hba *hba, u32 host_id) 634{ 635 struct tcmu_hba *tcmu_hba; 636 637 tcmu_hba = kzalloc(sizeof(struct tcmu_hba), GFP_KERNEL); 638 if (!tcmu_hba) 639 return -ENOMEM; 640 641 tcmu_hba->host_id = host_id; 642 hba->hba_ptr = tcmu_hba; 643 644 return 0; 645} 646 647static void tcmu_detach_hba(struct se_hba *hba) 648{ 649 kfree(hba->hba_ptr); 650 hba->hba_ptr = NULL; 651} 652 653static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) 654{ 655 struct tcmu_dev *udev; 656 657 udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL); 658 if (!udev) 659 return NULL; 660 661 udev->name = kstrdup(name, GFP_KERNEL); 662 if (!udev->name) { 663 kfree(udev); 664 return NULL; 665 } 666 667 udev->hba = hba; 668 669 init_waitqueue_head(&udev->wait_cmdr); 670 spin_lock_init(&udev->cmdr_lock); 671 672 idr_init(&udev->commands); 673 spin_lock_init(&udev->commands_lock); 674 675 setup_timer(&udev->timeout, tcmu_device_timedout, 676 (unsigned long)udev); 677 678 return &udev->se_dev; 679} 680 681static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on) 682{ 683 struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info); 684 685 tcmu_handle_completions(tcmu_dev); 686 687 return 0; 688} 689 690/* 691 * mmap code from uio.c. Copied here because we want to hook mmap() 692 * and this stuff must come along. 693 */ 694static int tcmu_find_mem_index(struct vm_area_struct *vma) 695{ 696 struct tcmu_dev *udev = vma->vm_private_data; 697 struct uio_info *info = &udev->uio_info; 698 699 if (vma->vm_pgoff < MAX_UIO_MAPS) { 700 if (info->mem[vma->vm_pgoff].size == 0) 701 return -1; 702 return (int)vma->vm_pgoff; 703 } 704 return -1; 705} 706 707static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 708{ 709 struct tcmu_dev *udev = vma->vm_private_data; 710 struct uio_info *info = &udev->uio_info; 711 struct page *page; 712 unsigned long offset; 713 void *addr; 714 715 int mi = tcmu_find_mem_index(vma); 716 if (mi < 0) 717 return VM_FAULT_SIGBUS; 718 719 /* 720 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE 721 * to use mem[N]. 722 */ 723 offset = (vmf->pgoff - mi) << PAGE_SHIFT; 724 725 addr = (void *)(unsigned long)info->mem[mi].addr + offset; 726 if (info->mem[mi].memtype == UIO_MEM_LOGICAL) 727 page = virt_to_page(addr); 728 else 729 page = vmalloc_to_page(addr); 730 get_page(page); 731 vmf->page = page; 732 return 0; 733} 734 735static const struct vm_operations_struct tcmu_vm_ops = { 736 .fault = tcmu_vma_fault, 737}; 738 739static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma) 740{ 741 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); 742 743 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 744 vma->vm_ops = &tcmu_vm_ops; 745 746 vma->vm_private_data = udev; 747 748 /* Ensure the mmap is exactly the right size */ 749 if (vma_pages(vma) != (TCMU_RING_SIZE >> PAGE_SHIFT)) 750 return -EINVAL; 751 752 return 0; 753} 754 755static int tcmu_open(struct uio_info *info, struct inode *inode) 756{ 757 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); 758 759 /* O_EXCL not supported for char devs, so fake it? */ 760 if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags)) 761 return -EBUSY; 762 763 pr_debug("open\n"); 764 765 return 0; 766} 767 768static int tcmu_release(struct uio_info *info, struct inode *inode) 769{ 770 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); 771 772 clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags); 773 774 pr_debug("close\n"); 775 776 return 0; 777} 778 779static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int minor) 780{ 781 struct sk_buff *skb; 782 void *msg_header; 783 int ret = -ENOMEM; 784 785 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 786 if (!skb) 787 return ret; 788 789 msg_header = genlmsg_put(skb, 0, 0, &tcmu_genl_family, 0, cmd); 790 if (!msg_header) 791 goto free_skb; 792 793 ret = nla_put_string(skb, TCMU_ATTR_DEVICE, name); 794 if (ret < 0) 795 goto free_skb; 796 797 ret = nla_put_u32(skb, TCMU_ATTR_MINOR, minor); 798 if (ret < 0) 799 goto free_skb; 800 801 genlmsg_end(skb, msg_header); 802 803 ret = genlmsg_multicast(&tcmu_genl_family, skb, 0, 804 TCMU_MCGRP_CONFIG, GFP_KERNEL); 805 806 /* We don't care if no one is listening */ 807 if (ret == -ESRCH) 808 ret = 0; 809 810 return ret; 811free_skb: 812 nlmsg_free(skb); 813 return ret; 814} 815 816static int tcmu_configure_device(struct se_device *dev) 817{ 818 struct tcmu_dev *udev = TCMU_DEV(dev); 819 struct tcmu_hba *hba = udev->hba->hba_ptr; 820 struct uio_info *info; 821 struct tcmu_mailbox *mb; 822 size_t size; 823 size_t used; 824 int ret = 0; 825 char *str; 826 827 info = &udev->uio_info; 828 829 size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name, 830 udev->dev_config); 831 size += 1; /* for \0 */ 832 str = kmalloc(size, GFP_KERNEL); 833 if (!str) 834 return -ENOMEM; 835 836 used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name); 837 838 if (udev->dev_config[0]) 839 snprintf(str + used, size - used, "/%s", udev->dev_config); 840 841 info->name = str; 842 843 udev->mb_addr = vzalloc(TCMU_RING_SIZE); 844 if (!udev->mb_addr) { 845 ret = -ENOMEM; 846 goto err_vzalloc; 847 } 848 849 /* mailbox fits in first part of CMDR space */ 850 udev->cmdr_size = CMDR_SIZE - CMDR_OFF; 851 udev->data_off = CMDR_SIZE; 852 udev->data_size = TCMU_RING_SIZE - CMDR_SIZE; 853 854 mb = udev->mb_addr; 855 mb->version = TCMU_MAILBOX_VERSION; 856 mb->cmdr_off = CMDR_OFF; 857 mb->cmdr_size = udev->cmdr_size; 858 859 WARN_ON(!PAGE_ALIGNED(udev->data_off)); 860 WARN_ON(udev->data_size % PAGE_SIZE); 861 862 info->version = xstr(TCMU_MAILBOX_VERSION); 863 864 info->mem[0].name = "tcm-user command & data buffer"; 865 info->mem[0].addr = (phys_addr_t) udev->mb_addr; 866 info->mem[0].size = TCMU_RING_SIZE; 867 info->mem[0].memtype = UIO_MEM_VIRTUAL; 868 869 info->irqcontrol = tcmu_irqcontrol; 870 info->irq = UIO_IRQ_CUSTOM; 871 872 info->mmap = tcmu_mmap; 873 info->open = tcmu_open; 874 info->release = tcmu_release; 875 876 ret = uio_register_device(tcmu_root_device, info); 877 if (ret) 878 goto err_register; 879 880 /* Other attributes can be configured in userspace */ 881 dev->dev_attrib.hw_block_size = 512; 882 dev->dev_attrib.hw_max_sectors = 128; 883 dev->dev_attrib.hw_queue_depth = 128; 884 885 ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name, 886 udev->uio_info.uio_dev->minor); 887 if (ret) 888 goto err_netlink; 889 890 return 0; 891 892err_netlink: 893 uio_unregister_device(&udev->uio_info); 894err_register: 895 vfree(udev->mb_addr); 896err_vzalloc: 897 kfree(info->name); 898 899 return ret; 900} 901 902static int tcmu_check_pending_cmd(int id, void *p, void *data) 903{ 904 struct tcmu_cmd *cmd = p; 905 906 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) 907 return 0; 908 return -EINVAL; 909} 910 911static void tcmu_free_device(struct se_device *dev) 912{ 913 struct tcmu_dev *udev = TCMU_DEV(dev); 914 int i; 915 916 del_timer_sync(&udev->timeout); 917 918 vfree(udev->mb_addr); 919 920 /* Upper layer should drain all requests before calling this */ 921 spin_lock_irq(&udev->commands_lock); 922 i = idr_for_each(&udev->commands, tcmu_check_pending_cmd, NULL); 923 idr_destroy(&udev->commands); 924 spin_unlock_irq(&udev->commands_lock); 925 WARN_ON(i); 926 927 /* Device was configured */ 928 if (udev->uio_info.uio_dev) { 929 tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name, 930 udev->uio_info.uio_dev->minor); 931 932 uio_unregister_device(&udev->uio_info); 933 kfree(udev->uio_info.name); 934 kfree(udev->name); 935 } 936 937 kfree(udev); 938} 939 940enum { 941 Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err, 942}; 943 944static match_table_t tokens = { 945 {Opt_dev_config, "dev_config=%s"}, 946 {Opt_dev_size, "dev_size=%u"}, 947 {Opt_hw_block_size, "hw_block_size=%u"}, 948 {Opt_err, NULL} 949}; 950 951static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, 952 const char *page, ssize_t count) 953{ 954 struct tcmu_dev *udev = TCMU_DEV(dev); 955 char *orig, *ptr, *opts, *arg_p; 956 substring_t args[MAX_OPT_ARGS]; 957 int ret = 0, token; 958 unsigned long tmp_ul; 959 960 opts = kstrdup(page, GFP_KERNEL); 961 if (!opts) 962 return -ENOMEM; 963 964 orig = opts; 965 966 while ((ptr = strsep(&opts, ",\n")) != NULL) { 967 if (!*ptr) 968 continue; 969 970 token = match_token(ptr, tokens, args); 971 switch (token) { 972 case Opt_dev_config: 973 if (match_strlcpy(udev->dev_config, &args[0], 974 TCMU_CONFIG_LEN) == 0) { 975 ret = -EINVAL; 976 break; 977 } 978 pr_debug("TCMU: Referencing Path: %s\n", udev->dev_config); 979 break; 980 case Opt_dev_size: 981 arg_p = match_strdup(&args[0]); 982 if (!arg_p) { 983 ret = -ENOMEM; 984 break; 985 } 986 ret = kstrtoul(arg_p, 0, (unsigned long *) &udev->dev_size); 987 kfree(arg_p); 988 if (ret < 0) 989 pr_err("kstrtoul() failed for dev_size=\n"); 990 break; 991 case Opt_hw_block_size: 992 arg_p = match_strdup(&args[0]); 993 if (!arg_p) { 994 ret = -ENOMEM; 995 break; 996 } 997 ret = kstrtoul(arg_p, 0, &tmp_ul); 998 kfree(arg_p); 999 if (ret < 0) { 1000 pr_err("kstrtoul() failed for hw_block_size=\n"); 1001 break; 1002 } 1003 if (!tmp_ul) { 1004 pr_err("hw_block_size must be nonzero\n"); 1005 break; 1006 } 1007 dev->dev_attrib.hw_block_size = tmp_ul; 1008 break; 1009 default: 1010 break; 1011 } 1012 } 1013 1014 kfree(orig); 1015 return (!ret) ? count : ret; 1016} 1017 1018static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) 1019{ 1020 struct tcmu_dev *udev = TCMU_DEV(dev); 1021 ssize_t bl = 0; 1022 1023 bl = sprintf(b + bl, "Config: %s ", 1024 udev->dev_config[0] ? udev->dev_config : "NULL"); 1025 bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); 1026 1027 return bl; 1028} 1029 1030static sector_t tcmu_get_blocks(struct se_device *dev) 1031{ 1032 struct tcmu_dev *udev = TCMU_DEV(dev); 1033 1034 return div_u64(udev->dev_size - dev->dev_attrib.block_size, 1035 dev->dev_attrib.block_size); 1036} 1037 1038static sense_reason_t 1039tcmu_pass_op(struct se_cmd *se_cmd) 1040{ 1041 int ret = tcmu_queue_cmd(se_cmd); 1042 1043 if (ret != 0) 1044 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1045 else 1046 return TCM_NO_SENSE; 1047} 1048 1049static sense_reason_t 1050tcmu_parse_cdb(struct se_cmd *cmd) 1051{ 1052 return passthrough_parse_cdb(cmd, tcmu_pass_op); 1053} 1054 1055DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type); 1056TB_DEV_ATTR_RO(tcmu, hw_pi_prot_type); 1057 1058DEF_TB_DEV_ATTRIB_RO(tcmu, hw_block_size); 1059TB_DEV_ATTR_RO(tcmu, hw_block_size); 1060 1061DEF_TB_DEV_ATTRIB_RO(tcmu, hw_max_sectors); 1062TB_DEV_ATTR_RO(tcmu, hw_max_sectors); 1063 1064DEF_TB_DEV_ATTRIB_RO(tcmu, hw_queue_depth); 1065TB_DEV_ATTR_RO(tcmu, hw_queue_depth); 1066 1067static struct configfs_attribute *tcmu_backend_dev_attrs[] = { 1068 &tcmu_dev_attrib_hw_pi_prot_type.attr, 1069 &tcmu_dev_attrib_hw_block_size.attr, 1070 &tcmu_dev_attrib_hw_max_sectors.attr, 1071 &tcmu_dev_attrib_hw_queue_depth.attr, 1072 NULL, 1073}; 1074 1075static struct se_subsystem_api tcmu_template = { 1076 .name = "user", 1077 .inquiry_prod = "USER", 1078 .inquiry_rev = TCMU_VERSION, 1079 .owner = THIS_MODULE, 1080 .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, 1081 .attach_hba = tcmu_attach_hba, 1082 .detach_hba = tcmu_detach_hba, 1083 .alloc_device = tcmu_alloc_device, 1084 .configure_device = tcmu_configure_device, 1085 .free_device = tcmu_free_device, 1086 .parse_cdb = tcmu_parse_cdb, 1087 .set_configfs_dev_params = tcmu_set_configfs_dev_params, 1088 .show_configfs_dev_params = tcmu_show_configfs_dev_params, 1089 .get_device_type = sbc_get_device_type, 1090 .get_blocks = tcmu_get_blocks, 1091}; 1092 1093static int __init tcmu_module_init(void) 1094{ 1095 struct target_backend_cits *tbc = &tcmu_template.tb_cits; 1096 int ret; 1097 1098 BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); 1099 1100 tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache", 1101 sizeof(struct tcmu_cmd), 1102 __alignof__(struct tcmu_cmd), 1103 0, NULL); 1104 if (!tcmu_cmd_cache) 1105 return -ENOMEM; 1106 1107 tcmu_root_device = root_device_register("tcm_user"); 1108 if (IS_ERR(tcmu_root_device)) { 1109 ret = PTR_ERR(tcmu_root_device); 1110 goto out_free_cache; 1111 } 1112 1113 ret = genl_register_family(&tcmu_genl_family); 1114 if (ret < 0) { 1115 goto out_unreg_device; 1116 } 1117 1118 target_core_setup_sub_cits(&tcmu_template); 1119 tbc->tb_dev_attrib_cit.ct_attrs = tcmu_backend_dev_attrs; 1120 1121 ret = transport_subsystem_register(&tcmu_template); 1122 if (ret) 1123 goto out_unreg_genl; 1124 1125 return 0; 1126 1127out_unreg_genl: 1128 genl_unregister_family(&tcmu_genl_family); 1129out_unreg_device: 1130 root_device_unregister(tcmu_root_device); 1131out_free_cache: 1132 kmem_cache_destroy(tcmu_cmd_cache); 1133 1134 return ret; 1135} 1136 1137static void __exit tcmu_module_exit(void) 1138{ 1139 transport_subsystem_release(&tcmu_template); 1140 genl_unregister_family(&tcmu_genl_family); 1141 root_device_unregister(tcmu_root_device); 1142 kmem_cache_destroy(tcmu_cmd_cache); 1143} 1144 1145MODULE_DESCRIPTION("TCM USER subsystem plugin"); 1146MODULE_AUTHOR("Shaohua Li <shli@kernel.org>"); 1147MODULE_AUTHOR("Andy Grover <agrover@redhat.com>"); 1148MODULE_LICENSE("GPL"); 1149 1150module_init(tcmu_module_init); 1151module_exit(tcmu_module_exit); 1152