1/* 2 * nvme-lightnvm.c - LightNVM NVMe device 3 * 4 * Copyright (C) 2014-2015 IT University of Copenhagen 5 * Initial release: Matias Bjorling <mb@lightnvm.io> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License version 9 * 2 as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, but 12 * WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; see the file COPYING. If not, write to 18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 19 * USA. 20 * 21 */ 22 23#include "nvme.h" 24 25#include <linux/nvme.h> 26#include <linux/bitops.h> 27#include <linux/lightnvm.h> 28#include <linux/vmalloc.h> 29 30enum nvme_nvm_admin_opcode { 31 nvme_nvm_admin_identity = 0xe2, 32 nvme_nvm_admin_get_l2p_tbl = 0xea, 33 nvme_nvm_admin_get_bb_tbl = 0xf2, 34 nvme_nvm_admin_set_bb_tbl = 0xf1, 35}; 36 37struct nvme_nvm_hb_rw { 38 __u8 opcode; 39 __u8 flags; 40 __u16 command_id; 41 __le32 nsid; 42 __u64 rsvd2; 43 __le64 metadata; 44 __le64 prp1; 45 __le64 prp2; 46 __le64 spba; 47 __le16 length; 48 __le16 control; 49 __le32 dsmgmt; 50 __le64 slba; 51}; 52 53struct nvme_nvm_ph_rw { 54 __u8 opcode; 55 __u8 flags; 56 __u16 command_id; 57 __le32 nsid; 58 __u64 rsvd2; 59 __le64 metadata; 60 __le64 prp1; 61 __le64 prp2; 62 __le64 spba; 63 __le16 length; 64 __le16 control; 65 __le32 dsmgmt; 66 __le64 resv; 67}; 68 69struct nvme_nvm_identity { 70 __u8 opcode; 71 __u8 flags; 72 __u16 command_id; 73 __le32 nsid; 74 __u64 rsvd[2]; 75 __le64 prp1; 76 __le64 prp2; 77 __le32 chnl_off; 78 __u32 rsvd11[5]; 79}; 80 81struct nvme_nvm_l2ptbl { 82 __u8 opcode; 83 __u8 flags; 84 __u16 command_id; 85 __le32 nsid; 86 __le32 cdw2[4]; 87 __le64 prp1; 88 __le64 prp2; 89 __le64 slba; 90 __le32 nlb; 91 __le16 cdw14[6]; 92}; 93 94struct nvme_nvm_getbbtbl { 95 __u8 opcode; 96 __u8 flags; 97 __u16 command_id; 98 __le32 nsid; 99 __u64 rsvd[2]; 100 __le64 prp1; 101 __le64 prp2; 102 __le64 spba; 103 __u32 rsvd4[4]; 104}; 105 106struct nvme_nvm_setbbtbl { 107 __u8 opcode; 108 __u8 flags; 109 __u16 command_id; 110 __le32 nsid; 111 __le64 rsvd[2]; 112 __le64 prp1; 113 __le64 prp2; 114 __le64 spba; 115 __le16 nlb; 116 __u8 value; 117 __u8 rsvd3; 118 __u32 rsvd4[3]; 119}; 120 121struct nvme_nvm_erase_blk { 122 __u8 opcode; 123 __u8 flags; 124 __u16 command_id; 125 __le32 nsid; 126 __u64 rsvd[2]; 127 __le64 prp1; 128 __le64 prp2; 129 __le64 spba; 130 __le16 length; 131 __le16 control; 132 __le32 dsmgmt; 133 __le64 resv; 134}; 135 136struct nvme_nvm_command { 137 union { 138 struct nvme_common_command common; 139 struct nvme_nvm_identity identity; 140 struct nvme_nvm_hb_rw hb_rw; 141 struct nvme_nvm_ph_rw ph_rw; 142 struct nvme_nvm_l2ptbl l2p; 143 struct nvme_nvm_getbbtbl get_bb; 144 struct nvme_nvm_setbbtbl set_bb; 145 struct nvme_nvm_erase_blk erase; 146 }; 147}; 148 149struct nvme_nvm_id_group { 150 __u8 mtype; 151 __u8 fmtype; 152 __le16 res16; 153 __u8 num_ch; 154 __u8 num_lun; 155 __u8 num_pln; 156 __u8 rsvd1; 157 __le16 num_blk; 158 __le16 num_pg; 159 __le16 fpg_sz; 160 __le16 csecs; 161 __le16 sos; 162 __le16 rsvd2; 163 __le32 trdt; 164 __le32 trdm; 165 __le32 tprt; 166 __le32 tprm; 167 __le32 tbet; 168 __le32 tbem; 169 __le32 mpos; 170 __le32 mccap; 171 __le16 cpar; 172 __u8 reserved[906]; 173} __packed; 174 175struct nvme_nvm_addr_format { 176 __u8 ch_offset; 177 __u8 ch_len; 178 __u8 lun_offset; 179 __u8 lun_len; 180 __u8 pln_offset; 181 __u8 pln_len; 182 __u8 blk_offset; 183 __u8 blk_len; 184 __u8 pg_offset; 185 __u8 pg_len; 186 __u8 sect_offset; 187 __u8 sect_len; 188 __u8 res[4]; 189} __packed; 190 191struct nvme_nvm_id { 192 __u8 ver_id; 193 __u8 vmnt; 194 __u8 cgrps; 195 __u8 res; 196 __le32 cap; 197 __le32 dom; 198 struct nvme_nvm_addr_format ppaf; 199 __u8 resv[228]; 200 struct nvme_nvm_id_group groups[4]; 201} __packed; 202 203struct nvme_nvm_bb_tbl { 204 __u8 tblid[4]; 205 __le16 verid; 206 __le16 revid; 207 __le32 rvsd1; 208 __le32 tblks; 209 __le32 tfact; 210 __le32 tgrown; 211 __le32 tdresv; 212 __le32 thresv; 213 __le32 rsvd2[8]; 214 __u8 blk[0]; 215}; 216 217/* 218 * Check we didn't inadvertently grow the command struct 219 */ 220static inline void _nvme_nvm_check_size(void) 221{ 222 BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); 223 BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64); 224 BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); 225 BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); 226 BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); 227 BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); 228 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); 229 BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); 230 BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128); 231 BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); 232 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512); 233} 234 235static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) 236{ 237 struct nvme_nvm_id_group *src; 238 struct nvm_id_group *dst; 239 int i, end; 240 241 end = min_t(u32, 4, nvm_id->cgrps); 242 243 for (i = 0; i < end; i++) { 244 src = &nvme_nvm_id->groups[i]; 245 dst = &nvm_id->groups[i]; 246 247 dst->mtype = src->mtype; 248 dst->fmtype = src->fmtype; 249 dst->num_ch = src->num_ch; 250 dst->num_lun = src->num_lun; 251 dst->num_pln = src->num_pln; 252 253 dst->num_pg = le16_to_cpu(src->num_pg); 254 dst->num_blk = le16_to_cpu(src->num_blk); 255 dst->fpg_sz = le16_to_cpu(src->fpg_sz); 256 dst->csecs = le16_to_cpu(src->csecs); 257 dst->sos = le16_to_cpu(src->sos); 258 259 dst->trdt = le32_to_cpu(src->trdt); 260 dst->trdm = le32_to_cpu(src->trdm); 261 dst->tprt = le32_to_cpu(src->tprt); 262 dst->tprm = le32_to_cpu(src->tprm); 263 dst->tbet = le32_to_cpu(src->tbet); 264 dst->tbem = le32_to_cpu(src->tbem); 265 dst->mpos = le32_to_cpu(src->mpos); 266 dst->mccap = le32_to_cpu(src->mccap); 267 268 dst->cpar = le16_to_cpu(src->cpar); 269 } 270 271 return 0; 272} 273 274static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) 275{ 276 struct nvme_ns *ns = nvmdev->q->queuedata; 277 struct nvme_dev *dev = ns->dev; 278 struct nvme_nvm_id *nvme_nvm_id; 279 struct nvme_nvm_command c = {}; 280 int ret; 281 282 c.identity.opcode = nvme_nvm_admin_identity; 283 c.identity.nsid = cpu_to_le32(ns->ns_id); 284 c.identity.chnl_off = 0; 285 286 nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL); 287 if (!nvme_nvm_id) 288 return -ENOMEM; 289 290 ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, 291 nvme_nvm_id, sizeof(struct nvme_nvm_id)); 292 if (ret) { 293 ret = -EIO; 294 goto out; 295 } 296 297 nvm_id->ver_id = nvme_nvm_id->ver_id; 298 nvm_id->vmnt = nvme_nvm_id->vmnt; 299 nvm_id->cgrps = nvme_nvm_id->cgrps; 300 nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); 301 nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); 302 memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, 303 sizeof(struct nvme_nvm_addr_format)); 304 305 ret = init_grps(nvm_id, nvme_nvm_id); 306out: 307 kfree(nvme_nvm_id); 308 return ret; 309} 310 311static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, 312 nvm_l2p_update_fn *update_l2p, void *priv) 313{ 314 struct nvme_ns *ns = nvmdev->q->queuedata; 315 struct nvme_dev *dev = ns->dev; 316 struct nvme_nvm_command c = {}; 317 u32 len = queue_max_hw_sectors(dev->admin_q) << 9; 318 u32 nlb_pr_rq = len / sizeof(u64); 319 u64 cmd_slba = slba; 320 void *entries; 321 int ret = 0; 322 323 c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl; 324 c.l2p.nsid = cpu_to_le32(ns->ns_id); 325 entries = kmalloc(len, GFP_KERNEL); 326 if (!entries) 327 return -ENOMEM; 328 329 while (nlb) { 330 u32 cmd_nlb = min(nlb_pr_rq, nlb); 331 332 c.l2p.slba = cpu_to_le64(cmd_slba); 333 c.l2p.nlb = cpu_to_le32(cmd_nlb); 334 335 ret = nvme_submit_sync_cmd(dev->admin_q, 336 (struct nvme_command *)&c, entries, len); 337 if (ret) { 338 dev_err(dev->dev, "L2P table transfer failed (%d)\n", 339 ret); 340 ret = -EIO; 341 goto out; 342 } 343 344 if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { 345 ret = -EINTR; 346 goto out; 347 } 348 349 cmd_slba += cmd_nlb; 350 nlb -= cmd_nlb; 351 } 352 353out: 354 kfree(entries); 355 return ret; 356} 357 358static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, 359 int nr_blocks, nvm_bb_update_fn *update_bbtbl, 360 void *priv) 361{ 362 struct request_queue *q = nvmdev->q; 363 struct nvme_ns *ns = q->queuedata; 364 struct nvme_dev *dev = ns->dev; 365 struct nvme_nvm_command c = {}; 366 struct nvme_nvm_bb_tbl *bb_tbl; 367 int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks; 368 int ret = 0; 369 370 c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; 371 c.get_bb.nsid = cpu_to_le32(ns->ns_id); 372 c.get_bb.spba = cpu_to_le64(ppa.ppa); 373 374 bb_tbl = kzalloc(tblsz, GFP_KERNEL); 375 if (!bb_tbl) 376 return -ENOMEM; 377 378 ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, 379 bb_tbl, tblsz); 380 if (ret) { 381 dev_err(dev->dev, "get bad block table failed (%d)\n", ret); 382 ret = -EIO; 383 goto out; 384 } 385 386 if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || 387 bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { 388 dev_err(dev->dev, "bbt format mismatch\n"); 389 ret = -EINVAL; 390 goto out; 391 } 392 393 if (le16_to_cpu(bb_tbl->verid) != 1) { 394 ret = -EINVAL; 395 dev_err(dev->dev, "bbt version not supported\n"); 396 goto out; 397 } 398 399 if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) { 400 ret = -EINVAL; 401 dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)", 402 le32_to_cpu(bb_tbl->tblks), nr_blocks); 403 goto out; 404 } 405 406 ppa = dev_to_generic_addr(nvmdev, ppa); 407 ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv); 408 if (ret) { 409 ret = -EINTR; 410 goto out; 411 } 412 413out: 414 kfree(bb_tbl); 415 return ret; 416} 417 418static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd, 419 int type) 420{ 421 struct nvme_ns *ns = nvmdev->q->queuedata; 422 struct nvme_dev *dev = ns->dev; 423 struct nvme_nvm_command c = {}; 424 int ret = 0; 425 426 c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl; 427 c.set_bb.nsid = cpu_to_le32(ns->ns_id); 428 c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa); 429 c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1); 430 c.set_bb.value = type; 431 432 ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, 433 NULL, 0); 434 if (ret) 435 dev_err(dev->dev, "set bad block table failed (%d)\n", ret); 436 return ret; 437} 438 439static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, 440 struct nvme_ns *ns, struct nvme_nvm_command *c) 441{ 442 c->ph_rw.opcode = rqd->opcode; 443 c->ph_rw.nsid = cpu_to_le32(ns->ns_id); 444 c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); 445 c->ph_rw.control = cpu_to_le16(rqd->flags); 446 c->ph_rw.length = cpu_to_le16(rqd->nr_pages - 1); 447 448 if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) 449 c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, 450 rqd->bio->bi_iter.bi_sector)); 451} 452 453static void nvme_nvm_end_io(struct request *rq, int error) 454{ 455 struct nvm_rq *rqd = rq->end_io_data; 456 struct nvm_dev *dev = rqd->dev; 457 458 if (dev->mt && dev->mt->end_io(rqd, error)) 459 pr_err("nvme: err status: %x result: %lx\n", 460 rq->errors, (unsigned long)rq->special); 461 462 kfree(rq->cmd); 463 blk_mq_free_request(rq); 464} 465 466static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) 467{ 468 struct request_queue *q = dev->q; 469 struct nvme_ns *ns = q->queuedata; 470 struct request *rq; 471 struct bio *bio = rqd->bio; 472 struct nvme_nvm_command *cmd; 473 474 rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0); 475 if (IS_ERR(rq)) 476 return -ENOMEM; 477 478 cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); 479 if (!cmd) { 480 blk_mq_free_request(rq); 481 return -ENOMEM; 482 } 483 484 rq->cmd_type = REQ_TYPE_DRV_PRIV; 485 rq->ioprio = bio_prio(bio); 486 487 if (bio_has_data(bio)) 488 rq->nr_phys_segments = bio_phys_segments(q, bio); 489 490 rq->__data_len = bio->bi_iter.bi_size; 491 rq->bio = rq->biotail = bio; 492 493 nvme_nvm_rqtocmd(rq, rqd, ns, cmd); 494 495 rq->cmd = (unsigned char *)cmd; 496 rq->cmd_len = sizeof(struct nvme_nvm_command); 497 rq->special = (void *)0; 498 499 rq->end_io_data = rqd; 500 501 blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); 502 503 return 0; 504} 505 506static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) 507{ 508 struct request_queue *q = dev->q; 509 struct nvme_ns *ns = q->queuedata; 510 struct nvme_nvm_command c = {}; 511 512 c.erase.opcode = NVM_OP_ERASE; 513 c.erase.nsid = cpu_to_le32(ns->ns_id); 514 c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); 515 c.erase.length = cpu_to_le16(rqd->nr_pages - 1); 516 517 return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); 518} 519 520static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) 521{ 522 struct nvme_ns *ns = nvmdev->q->queuedata; 523 struct nvme_dev *dev = ns->dev; 524 525 return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0); 526} 527 528static void nvme_nvm_destroy_dma_pool(void *pool) 529{ 530 struct dma_pool *dma_pool = pool; 531 532 dma_pool_destroy(dma_pool); 533} 534 535static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, 536 gfp_t mem_flags, dma_addr_t *dma_handler) 537{ 538 return dma_pool_alloc(pool, mem_flags, dma_handler); 539} 540 541static void nvme_nvm_dev_dma_free(void *pool, void *ppa_list, 542 dma_addr_t dma_handler) 543{ 544 dma_pool_free(pool, ppa_list, dma_handler); 545} 546 547static struct nvm_dev_ops nvme_nvm_dev_ops = { 548 .identity = nvme_nvm_identity, 549 550 .get_l2p_tbl = nvme_nvm_get_l2p_tbl, 551 552 .get_bb_tbl = nvme_nvm_get_bb_tbl, 553 .set_bb_tbl = nvme_nvm_set_bb_tbl, 554 555 .submit_io = nvme_nvm_submit_io, 556 .erase_block = nvme_nvm_erase_block, 557 558 .create_dma_pool = nvme_nvm_create_dma_pool, 559 .destroy_dma_pool = nvme_nvm_destroy_dma_pool, 560 .dev_dma_alloc = nvme_nvm_dev_dma_alloc, 561 .dev_dma_free = nvme_nvm_dev_dma_free, 562 563 .max_phys_sect = 64, 564}; 565 566int nvme_nvm_register(struct request_queue *q, char *disk_name) 567{ 568 return nvm_register(q, disk_name, &nvme_nvm_dev_ops); 569} 570 571void nvme_nvm_unregister(struct request_queue *q, char *disk_name) 572{ 573 nvm_unregister(disk_name); 574} 575 576/* move to shared place when used in multiple places. */ 577#define PCI_VENDOR_ID_CNEX 0x1d1d 578#define PCI_DEVICE_ID_CNEX_WL 0x2807 579#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f 580 581int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) 582{ 583 struct nvme_dev *dev = ns->dev; 584 struct pci_dev *pdev = to_pci_dev(dev->dev); 585 586 /* QEMU NVMe simulator - PCI ID + Vendor specific bit */ 587 if (pdev->vendor == PCI_VENDOR_ID_CNEX && 588 pdev->device == PCI_DEVICE_ID_CNEX_QEMU && 589 id->vs[0] == 0x1) 590 return 1; 591 592 /* CNEX Labs - PCI ID + Vendor specific bit */ 593 if (pdev->vendor == PCI_VENDOR_ID_CNEX && 594 pdev->device == PCI_DEVICE_ID_CNEX_WL && 595 id->vs[0] == 0x1) 596 return 1; 597 598 return 0; 599} 600