1/* 2 * Swap block device support for MTDs 3 * Turns an MTD device into a swap device with block wear leveling 4 * 5 * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 6 * 7 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 8 * 9 * Based on Richard Purdie's earlier implementation in 2007. Background 10 * support and lock-less operation written by Adrian Hunter. 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, but 17 * WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 * 02110-1301 USA 25 */ 26 27#include <linux/kernel.h> 28#include <linux/module.h> 29#include <linux/mtd/mtd.h> 30#include <linux/mtd/blktrans.h> 31#include <linux/rbtree.h> 32#include <linux/sched.h> 33#include <linux/slab.h> 34#include <linux/vmalloc.h> 35#include <linux/genhd.h> 36#include <linux/swap.h> 37#include <linux/debugfs.h> 38#include <linux/seq_file.h> 39#include <linux/device.h> 40#include <linux/math64.h> 41 42#define MTDSWAP_PREFIX "mtdswap" 43 44/* 45 * The number of free eraseblocks when GC should stop 46 */ 47#define CLEAN_BLOCK_THRESHOLD 20 48 49/* 50 * Number of free eraseblocks below which GC can also collect low frag 51 * blocks. 52 */ 53#define LOW_FRAG_GC_TRESHOLD 5 54 55/* 56 * Wear level cost amortization. We want to do wear leveling on the background 57 * without disturbing gc too much. This is made by defining max GC frequency. 58 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 59 * on the biggest wear difference rather than the biggest dirtiness. 60 * 61 * The lower freq2 should be chosen so that it makes sure the maximum erase 62 * difference will decrease even if a malicious application is deliberately 63 * trying to make erase differences large. 64 */ 65#define MAX_ERASE_DIFF 4000 66#define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 67#define COLLECT_NONDIRTY_FREQ1 6 68#define COLLECT_NONDIRTY_FREQ2 4 69 70#define PAGE_UNDEF UINT_MAX 71#define BLOCK_UNDEF UINT_MAX 72#define BLOCK_ERROR (UINT_MAX - 1) 73#define BLOCK_MAX (UINT_MAX - 2) 74 75#define EBLOCK_BAD (1 << 0) 76#define EBLOCK_NOMAGIC (1 << 1) 77#define EBLOCK_BITFLIP (1 << 2) 78#define EBLOCK_FAILED (1 << 3) 79#define EBLOCK_READERR (1 << 4) 80#define EBLOCK_IDX_SHIFT 5 81 82struct swap_eb { 83 struct rb_node rb; 84 struct rb_root *root; 85 86 unsigned int flags; 87 unsigned int active_count; 88 unsigned int erase_count; 89 unsigned int pad; /* speeds up pointer decrement */ 90}; 91 92#define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 93 rb)->erase_count) 94#define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 95 rb)->erase_count) 96 97struct mtdswap_tree { 98 struct rb_root root; 99 unsigned int count; 100}; 101 102enum { 103 MTDSWAP_CLEAN, 104 MTDSWAP_USED, 105 MTDSWAP_LOWFRAG, 106 MTDSWAP_HIFRAG, 107 MTDSWAP_DIRTY, 108 MTDSWAP_BITFLIP, 109 MTDSWAP_FAILING, 110 MTDSWAP_TREE_CNT, 111}; 112 113struct mtdswap_dev { 114 struct mtd_blktrans_dev *mbd_dev; 115 struct mtd_info *mtd; 116 struct device *dev; 117 118 unsigned int *page_data; 119 unsigned int *revmap; 120 121 unsigned int eblks; 122 unsigned int spare_eblks; 123 unsigned int pages_per_eblk; 124 unsigned int max_erase_count; 125 struct swap_eb *eb_data; 126 127 struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 128 129 unsigned long long sect_read_count; 130 unsigned long long sect_write_count; 131 unsigned long long mtd_write_count; 132 unsigned long long mtd_read_count; 133 unsigned long long discard_count; 134 unsigned long long discard_page_count; 135 136 unsigned int curr_write_pos; 137 struct swap_eb *curr_write; 138 139 char *page_buf; 140 char *oob_buf; 141 142 struct dentry *debugfs_root; 143}; 144 145struct mtdswap_oobdata { 146 __le16 magic; 147 __le32 count; 148} __packed; 149 150#define MTDSWAP_MAGIC_CLEAN 0x2095 151#define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 152#define MTDSWAP_TYPE_CLEAN 0 153#define MTDSWAP_TYPE_DIRTY 1 154#define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 155 156#define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 157#define MTDSWAP_IO_RETRIES 3 158 159enum { 160 MTDSWAP_SCANNED_CLEAN, 161 MTDSWAP_SCANNED_DIRTY, 162 MTDSWAP_SCANNED_BITFLIP, 163 MTDSWAP_SCANNED_BAD, 164}; 165 166/* 167 * In the worst case mtdswap_writesect() has allocated the last clean 168 * page from the current block and is then pre-empted by the GC 169 * thread. The thread can consume a full erase block when moving a 170 * block. 171 */ 172#define MIN_SPARE_EBLOCKS 2 173#define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 174 175#define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 176#define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 177#define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 178#define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 179 180#define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 181 182static char partitions[128] = ""; 183module_param_string(partitions, partitions, sizeof(partitions), 0444); 184MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 185 "partitions=\"1,3,5\""); 186 187static unsigned int spare_eblocks = 10; 188module_param(spare_eblocks, uint, 0444); 189MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 190 "garbage collection (default 10%)"); 191 192static bool header; /* false */ 193module_param(header, bool, 0444); 194MODULE_PARM_DESC(header, 195 "Include builtin swap header (default 0, without header)"); 196 197static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 198 199static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 200{ 201 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 202} 203 204static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 205{ 206 unsigned int oldidx; 207 struct mtdswap_tree *tp; 208 209 if (eb->root) { 210 tp = container_of(eb->root, struct mtdswap_tree, root); 211 oldidx = tp - &d->trees[0]; 212 213 d->trees[oldidx].count--; 214 rb_erase(&eb->rb, eb->root); 215 } 216} 217 218static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 219{ 220 struct rb_node **p, *parent = NULL; 221 struct swap_eb *cur; 222 223 p = &root->rb_node; 224 while (*p) { 225 parent = *p; 226 cur = rb_entry(parent, struct swap_eb, rb); 227 if (eb->erase_count > cur->erase_count) 228 p = &(*p)->rb_right; 229 else 230 p = &(*p)->rb_left; 231 } 232 233 rb_link_node(&eb->rb, parent, p); 234 rb_insert_color(&eb->rb, root); 235} 236 237static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 238{ 239 struct rb_root *root; 240 241 if (eb->root == &d->trees[idx].root) 242 return; 243 244 mtdswap_eb_detach(d, eb); 245 root = &d->trees[idx].root; 246 __mtdswap_rb_add(root, eb); 247 eb->root = root; 248 d->trees[idx].count++; 249} 250 251static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 252{ 253 struct rb_node *p; 254 unsigned int i; 255 256 p = rb_first(root); 257 i = 0; 258 while (i < idx && p) { 259 p = rb_next(p); 260 i++; 261 } 262 263 return p; 264} 265 266static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 267{ 268 int ret; 269 loff_t offset; 270 271 d->spare_eblks--; 272 eb->flags |= EBLOCK_BAD; 273 mtdswap_eb_detach(d, eb); 274 eb->root = NULL; 275 276 /* badblocks not supported */ 277 if (!mtd_can_have_bb(d->mtd)) 278 return 1; 279 280 offset = mtdswap_eb_offset(d, eb); 281 dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 282 ret = mtd_block_markbad(d->mtd, offset); 283 284 if (ret) { 285 dev_warn(d->dev, "Mark block bad failed for block at %08llx " 286 "error %d\n", offset, ret); 287 return ret; 288 } 289 290 return 1; 291 292} 293 294static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 295{ 296 unsigned int marked = eb->flags & EBLOCK_FAILED; 297 struct swap_eb *curr_write = d->curr_write; 298 299 eb->flags |= EBLOCK_FAILED; 300 if (curr_write == eb) { 301 d->curr_write = NULL; 302 303 if (!marked && d->curr_write_pos != 0) { 304 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 305 return 0; 306 } 307 } 308 309 return mtdswap_handle_badblock(d, eb); 310} 311 312static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 313 struct mtd_oob_ops *ops) 314{ 315 int ret = mtd_read_oob(d->mtd, from, ops); 316 317 if (mtd_is_bitflip(ret)) 318 return ret; 319 320 if (ret) { 321 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 322 ret, from); 323 return ret; 324 } 325 326 if (ops->oobretlen < ops->ooblen) { 327 dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 328 "%zd) for block at %08llx\n", 329 ops->oobretlen, ops->ooblen, from); 330 return -EIO; 331 } 332 333 return 0; 334} 335 336static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 337{ 338 struct mtdswap_oobdata *data, *data2; 339 int ret; 340 loff_t offset; 341 struct mtd_oob_ops ops; 342 343 offset = mtdswap_eb_offset(d, eb); 344 345 /* Check first if the block is bad. */ 346 if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset)) 347 return MTDSWAP_SCANNED_BAD; 348 349 ops.ooblen = 2 * d->mtd->ecclayout->oobavail; 350 ops.oobbuf = d->oob_buf; 351 ops.ooboffs = 0; 352 ops.datbuf = NULL; 353 ops.mode = MTD_OPS_AUTO_OOB; 354 355 ret = mtdswap_read_oob(d, offset, &ops); 356 357 if (ret && !mtd_is_bitflip(ret)) 358 return ret; 359 360 data = (struct mtdswap_oobdata *)d->oob_buf; 361 data2 = (struct mtdswap_oobdata *) 362 (d->oob_buf + d->mtd->ecclayout->oobavail); 363 364 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 365 eb->erase_count = le32_to_cpu(data->count); 366 if (mtd_is_bitflip(ret)) 367 ret = MTDSWAP_SCANNED_BITFLIP; 368 else { 369 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 370 ret = MTDSWAP_SCANNED_DIRTY; 371 else 372 ret = MTDSWAP_SCANNED_CLEAN; 373 } 374 } else { 375 eb->flags |= EBLOCK_NOMAGIC; 376 ret = MTDSWAP_SCANNED_DIRTY; 377 } 378 379 return ret; 380} 381 382static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 383 u16 marker) 384{ 385 struct mtdswap_oobdata n; 386 int ret; 387 loff_t offset; 388 struct mtd_oob_ops ops; 389 390 ops.ooboffs = 0; 391 ops.oobbuf = (uint8_t *)&n; 392 ops.mode = MTD_OPS_AUTO_OOB; 393 ops.datbuf = NULL; 394 395 if (marker == MTDSWAP_TYPE_CLEAN) { 396 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 397 n.count = cpu_to_le32(eb->erase_count); 398 ops.ooblen = MTDSWAP_OOBSIZE; 399 offset = mtdswap_eb_offset(d, eb); 400 } else { 401 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 402 ops.ooblen = sizeof(n.magic); 403 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 404 } 405 406 ret = mtd_write_oob(d->mtd, offset, &ops); 407 408 if (ret) { 409 dev_warn(d->dev, "Write OOB failed for block at %08llx " 410 "error %d\n", offset, ret); 411 if (ret == -EIO || mtd_is_eccerr(ret)) 412 mtdswap_handle_write_error(d, eb); 413 return ret; 414 } 415 416 if (ops.oobretlen != ops.ooblen) { 417 dev_warn(d->dev, "Short OOB write for block at %08llx: " 418 "%zd not %zd\n", 419 offset, ops.oobretlen, ops.ooblen); 420 return ret; 421 } 422 423 return 0; 424} 425 426/* 427 * Are there any erase blocks without MAGIC_CLEAN header, presumably 428 * because power was cut off after erase but before header write? We 429 * need to guestimate the erase count. 430 */ 431static void mtdswap_check_counts(struct mtdswap_dev *d) 432{ 433 struct rb_root hist_root = RB_ROOT; 434 struct rb_node *medrb; 435 struct swap_eb *eb; 436 unsigned int i, cnt, median; 437 438 cnt = 0; 439 for (i = 0; i < d->eblks; i++) { 440 eb = d->eb_data + i; 441 442 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 443 continue; 444 445 __mtdswap_rb_add(&hist_root, eb); 446 cnt++; 447 } 448 449 if (cnt == 0) 450 return; 451 452 medrb = mtdswap_rb_index(&hist_root, cnt / 2); 453 median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 454 455 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 456 457 for (i = 0; i < d->eblks; i++) { 458 eb = d->eb_data + i; 459 460 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 461 eb->erase_count = median; 462 463 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 464 continue; 465 466 rb_erase(&eb->rb, &hist_root); 467 } 468} 469 470static void mtdswap_scan_eblks(struct mtdswap_dev *d) 471{ 472 int status; 473 unsigned int i, idx; 474 struct swap_eb *eb; 475 476 for (i = 0; i < d->eblks; i++) { 477 eb = d->eb_data + i; 478 479 status = mtdswap_read_markers(d, eb); 480 if (status < 0) 481 eb->flags |= EBLOCK_READERR; 482 else if (status == MTDSWAP_SCANNED_BAD) { 483 eb->flags |= EBLOCK_BAD; 484 continue; 485 } 486 487 switch (status) { 488 case MTDSWAP_SCANNED_CLEAN: 489 idx = MTDSWAP_CLEAN; 490 break; 491 case MTDSWAP_SCANNED_DIRTY: 492 case MTDSWAP_SCANNED_BITFLIP: 493 idx = MTDSWAP_DIRTY; 494 break; 495 default: 496 idx = MTDSWAP_FAILING; 497 } 498 499 eb->flags |= (idx << EBLOCK_IDX_SHIFT); 500 } 501 502 mtdswap_check_counts(d); 503 504 for (i = 0; i < d->eblks; i++) { 505 eb = d->eb_data + i; 506 507 if (eb->flags & EBLOCK_BAD) 508 continue; 509 510 idx = eb->flags >> EBLOCK_IDX_SHIFT; 511 mtdswap_rb_add(d, eb, idx); 512 } 513} 514 515/* 516 * Place eblk into a tree corresponding to its number of active blocks 517 * it contains. 518 */ 519static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 520{ 521 unsigned int weight = eb->active_count; 522 unsigned int maxweight = d->pages_per_eblk; 523 524 if (eb == d->curr_write) 525 return; 526 527 if (eb->flags & EBLOCK_BITFLIP) 528 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 529 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 530 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 531 if (weight == maxweight) 532 mtdswap_rb_add(d, eb, MTDSWAP_USED); 533 else if (weight == 0) 534 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 535 else if (weight > (maxweight/2)) 536 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 537 else 538 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 539} 540 541 542static void mtdswap_erase_callback(struct erase_info *done) 543{ 544 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv; 545 wake_up(wait_q); 546} 547 548static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 549{ 550 struct mtd_info *mtd = d->mtd; 551 struct erase_info erase; 552 wait_queue_head_t wq; 553 unsigned int retries = 0; 554 int ret; 555 556 eb->erase_count++; 557 if (eb->erase_count > d->max_erase_count) 558 d->max_erase_count = eb->erase_count; 559 560retry: 561 init_waitqueue_head(&wq); 562 memset(&erase, 0, sizeof(struct erase_info)); 563 564 erase.mtd = mtd; 565 erase.callback = mtdswap_erase_callback; 566 erase.addr = mtdswap_eb_offset(d, eb); 567 erase.len = mtd->erasesize; 568 erase.priv = (u_long)&wq; 569 570 ret = mtd_erase(mtd, &erase); 571 if (ret) { 572 if (retries++ < MTDSWAP_ERASE_RETRIES) { 573 dev_warn(d->dev, 574 "erase of erase block %#llx on %s failed", 575 erase.addr, mtd->name); 576 yield(); 577 goto retry; 578 } 579 580 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 581 erase.addr, mtd->name); 582 583 mtdswap_handle_badblock(d, eb); 584 return -EIO; 585 } 586 587 ret = wait_event_interruptible(wq, erase.state == MTD_ERASE_DONE || 588 erase.state == MTD_ERASE_FAILED); 589 if (ret) { 590 dev_err(d->dev, "Interrupted erase block %#llx erassure on %s", 591 erase.addr, mtd->name); 592 return -EINTR; 593 } 594 595 if (erase.state == MTD_ERASE_FAILED) { 596 if (retries++ < MTDSWAP_ERASE_RETRIES) { 597 dev_warn(d->dev, 598 "erase of erase block %#llx on %s failed", 599 erase.addr, mtd->name); 600 yield(); 601 goto retry; 602 } 603 604 mtdswap_handle_badblock(d, eb); 605 return -EIO; 606 } 607 608 return 0; 609} 610 611static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 612 unsigned int *block) 613{ 614 int ret; 615 struct swap_eb *old_eb = d->curr_write; 616 struct rb_root *clean_root; 617 struct swap_eb *eb; 618 619 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 620 do { 621 if (TREE_EMPTY(d, CLEAN)) 622 return -ENOSPC; 623 624 clean_root = TREE_ROOT(d, CLEAN); 625 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 626 rb_erase(&eb->rb, clean_root); 627 eb->root = NULL; 628 TREE_COUNT(d, CLEAN)--; 629 630 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 631 } while (ret == -EIO || mtd_is_eccerr(ret)); 632 633 if (ret) 634 return ret; 635 636 d->curr_write_pos = 0; 637 d->curr_write = eb; 638 if (old_eb) 639 mtdswap_store_eb(d, old_eb); 640 } 641 642 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 643 d->curr_write_pos; 644 645 d->curr_write->active_count++; 646 d->revmap[*block] = page; 647 d->curr_write_pos++; 648 649 return 0; 650} 651 652static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 653{ 654 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 655 d->pages_per_eblk - d->curr_write_pos; 656} 657 658static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 659{ 660 return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 661} 662 663static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 664 unsigned int page, unsigned int *bp, int gc_context) 665{ 666 struct mtd_info *mtd = d->mtd; 667 struct swap_eb *eb; 668 size_t retlen; 669 loff_t writepos; 670 int ret; 671 672retry: 673 if (!gc_context) 674 while (!mtdswap_enough_free_pages(d)) 675 if (mtdswap_gc(d, 0) > 0) 676 return -ENOSPC; 677 678 ret = mtdswap_map_free_block(d, page, bp); 679 eb = d->eb_data + (*bp / d->pages_per_eblk); 680 681 if (ret == -EIO || mtd_is_eccerr(ret)) { 682 d->curr_write = NULL; 683 eb->active_count--; 684 d->revmap[*bp] = PAGE_UNDEF; 685 goto retry; 686 } 687 688 if (ret < 0) 689 return ret; 690 691 writepos = (loff_t)*bp << PAGE_SHIFT; 692 ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); 693 if (ret == -EIO || mtd_is_eccerr(ret)) { 694 d->curr_write_pos--; 695 eb->active_count--; 696 d->revmap[*bp] = PAGE_UNDEF; 697 mtdswap_handle_write_error(d, eb); 698 goto retry; 699 } 700 701 if (ret < 0) { 702 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)", 703 ret, retlen); 704 goto err; 705 } 706 707 if (retlen != PAGE_SIZE) { 708 dev_err(d->dev, "Short write to MTD device: %zd written", 709 retlen); 710 ret = -EIO; 711 goto err; 712 } 713 714 return ret; 715 716err: 717 d->curr_write_pos--; 718 eb->active_count--; 719 d->revmap[*bp] = PAGE_UNDEF; 720 721 return ret; 722} 723 724static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 725 unsigned int *newblock) 726{ 727 struct mtd_info *mtd = d->mtd; 728 struct swap_eb *eb, *oldeb; 729 int ret; 730 size_t retlen; 731 unsigned int page, retries; 732 loff_t readpos; 733 734 page = d->revmap[oldblock]; 735 readpos = (loff_t) oldblock << PAGE_SHIFT; 736 retries = 0; 737 738retry: 739 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 740 741 if (ret < 0 && !mtd_is_bitflip(ret)) { 742 oldeb = d->eb_data + oldblock / d->pages_per_eblk; 743 oldeb->flags |= EBLOCK_READERR; 744 745 dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 746 oldblock); 747 retries++; 748 if (retries < MTDSWAP_IO_RETRIES) 749 goto retry; 750 751 goto read_error; 752 } 753 754 if (retlen != PAGE_SIZE) { 755 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen, 756 oldblock); 757 ret = -EIO; 758 goto read_error; 759 } 760 761 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 762 if (ret < 0) { 763 d->page_data[page] = BLOCK_ERROR; 764 dev_err(d->dev, "Write error: %d\n", ret); 765 return ret; 766 } 767 768 eb = d->eb_data + *newblock / d->pages_per_eblk; 769 d->page_data[page] = *newblock; 770 d->revmap[oldblock] = PAGE_UNDEF; 771 eb = d->eb_data + oldblock / d->pages_per_eblk; 772 eb->active_count--; 773 774 return 0; 775 776read_error: 777 d->page_data[page] = BLOCK_ERROR; 778 d->revmap[oldblock] = PAGE_UNDEF; 779 return ret; 780} 781 782static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 783{ 784 unsigned int i, block, eblk_base, newblock; 785 int ret, errcode; 786 787 errcode = 0; 788 eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 789 790 for (i = 0; i < d->pages_per_eblk; i++) { 791 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 792 return -ENOSPC; 793 794 block = eblk_base + i; 795 if (d->revmap[block] == PAGE_UNDEF) 796 continue; 797 798 ret = mtdswap_move_block(d, block, &newblock); 799 if (ret < 0 && !errcode) 800 errcode = ret; 801 } 802 803 return errcode; 804} 805 806static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 807{ 808 int idx, stopat; 809 810 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_TRESHOLD) 811 stopat = MTDSWAP_LOWFRAG; 812 else 813 stopat = MTDSWAP_HIFRAG; 814 815 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 816 if (d->trees[idx].root.rb_node != NULL) 817 return idx; 818 819 return -1; 820} 821 822static int mtdswap_wlfreq(unsigned int maxdiff) 823{ 824 unsigned int h, x, y, dist, base; 825 826 /* 827 * Calculate linear ramp down from f1 to f2 when maxdiff goes from 828 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 829 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 830 */ 831 832 dist = maxdiff - MAX_ERASE_DIFF; 833 if (dist > COLLECT_NONDIRTY_BASE) 834 dist = COLLECT_NONDIRTY_BASE; 835 836 /* 837 * Modelling the slop as right angular triangle with base 838 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 839 * equal to the ratio h/base. 840 */ 841 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 842 base = COLLECT_NONDIRTY_BASE; 843 844 x = dist - base; 845 y = (x * h + base / 2) / base; 846 847 return COLLECT_NONDIRTY_FREQ2 + y; 848} 849 850static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 851{ 852 static unsigned int pick_cnt; 853 unsigned int i, idx = -1, wear, max; 854 struct rb_root *root; 855 856 max = 0; 857 for (i = 0; i <= MTDSWAP_DIRTY; i++) { 858 root = &d->trees[i].root; 859 if (root->rb_node == NULL) 860 continue; 861 862 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 863 if (wear > max) { 864 max = wear; 865 idx = i; 866 } 867 } 868 869 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 870 pick_cnt = 0; 871 return idx; 872 } 873 874 pick_cnt++; 875 return -1; 876} 877 878static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 879 unsigned int background) 880{ 881 int idx; 882 883 if (TREE_NONEMPTY(d, FAILING) && 884 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 885 return MTDSWAP_FAILING; 886 887 idx = mtdswap_choose_wl_tree(d); 888 if (idx >= MTDSWAP_CLEAN) 889 return idx; 890 891 return __mtdswap_choose_gc_tree(d); 892} 893 894static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 895 unsigned int background) 896{ 897 struct rb_root *rp = NULL; 898 struct swap_eb *eb = NULL; 899 int idx; 900 901 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 902 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 903 return NULL; 904 905 idx = mtdswap_choose_gc_tree(d, background); 906 if (idx < 0) 907 return NULL; 908 909 rp = &d->trees[idx].root; 910 eb = rb_entry(rb_first(rp), struct swap_eb, rb); 911 912 rb_erase(&eb->rb, rp); 913 eb->root = NULL; 914 d->trees[idx].count--; 915 return eb; 916} 917 918static unsigned int mtdswap_test_patt(unsigned int i) 919{ 920 return i % 2 ? 0x55555555 : 0xAAAAAAAA; 921} 922 923static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 924 struct swap_eb *eb) 925{ 926 struct mtd_info *mtd = d->mtd; 927 unsigned int test, i, j, patt, mtd_pages; 928 loff_t base, pos; 929 unsigned int *p1 = (unsigned int *)d->page_buf; 930 unsigned char *p2 = (unsigned char *)d->oob_buf; 931 struct mtd_oob_ops ops; 932 int ret; 933 934 ops.mode = MTD_OPS_AUTO_OOB; 935 ops.len = mtd->writesize; 936 ops.ooblen = mtd->ecclayout->oobavail; 937 ops.ooboffs = 0; 938 ops.datbuf = d->page_buf; 939 ops.oobbuf = d->oob_buf; 940 base = mtdswap_eb_offset(d, eb); 941 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 942 943 for (test = 0; test < 2; test++) { 944 pos = base; 945 for (i = 0; i < mtd_pages; i++) { 946 patt = mtdswap_test_patt(test + i); 947 memset(d->page_buf, patt, mtd->writesize); 948 memset(d->oob_buf, patt, mtd->ecclayout->oobavail); 949 ret = mtd_write_oob(mtd, pos, &ops); 950 if (ret) 951 goto error; 952 953 pos += mtd->writesize; 954 } 955 956 pos = base; 957 for (i = 0; i < mtd_pages; i++) { 958 ret = mtd_read_oob(mtd, pos, &ops); 959 if (ret) 960 goto error; 961 962 patt = mtdswap_test_patt(test + i); 963 for (j = 0; j < mtd->writesize/sizeof(int); j++) 964 if (p1[j] != patt) 965 goto error; 966 967 for (j = 0; j < mtd->ecclayout->oobavail; j++) 968 if (p2[j] != (unsigned char)patt) 969 goto error; 970 971 pos += mtd->writesize; 972 } 973 974 ret = mtdswap_erase_block(d, eb); 975 if (ret) 976 goto error; 977 } 978 979 eb->flags &= ~EBLOCK_READERR; 980 return 1; 981 982error: 983 mtdswap_handle_badblock(d, eb); 984 return 0; 985} 986 987static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 988{ 989 struct swap_eb *eb; 990 int ret; 991 992 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 993 return 1; 994 995 eb = mtdswap_pick_gc_eblk(d, background); 996 if (!eb) 997 return 1; 998 999 ret = mtdswap_gc_eblock(d, eb); 1000 if (ret == -ENOSPC) 1001 return 1; 1002 1003 if (eb->flags & EBLOCK_FAILED) { 1004 mtdswap_handle_badblock(d, eb); 1005 return 0; 1006 } 1007 1008 eb->flags &= ~EBLOCK_BITFLIP; 1009 ret = mtdswap_erase_block(d, eb); 1010 if ((eb->flags & EBLOCK_READERR) && 1011 (ret || !mtdswap_eblk_passes(d, eb))) 1012 return 0; 1013 1014 if (ret == 0) 1015 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 1016 1017 if (ret == 0) 1018 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 1019 else if (ret != -EIO && !mtd_is_eccerr(ret)) 1020 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 1021 1022 return 0; 1023} 1024 1025static void mtdswap_background(struct mtd_blktrans_dev *dev) 1026{ 1027 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1028 int ret; 1029 1030 while (1) { 1031 ret = mtdswap_gc(d, 1); 1032 if (ret || mtd_blktrans_cease_background(dev)) 1033 return; 1034 } 1035} 1036 1037static void mtdswap_cleanup(struct mtdswap_dev *d) 1038{ 1039 vfree(d->eb_data); 1040 vfree(d->revmap); 1041 vfree(d->page_data); 1042 kfree(d->oob_buf); 1043 kfree(d->page_buf); 1044} 1045 1046static int mtdswap_flush(struct mtd_blktrans_dev *dev) 1047{ 1048 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1049 1050 mtd_sync(d->mtd); 1051 return 0; 1052} 1053 1054static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1055{ 1056 loff_t offset; 1057 unsigned int badcnt; 1058 1059 badcnt = 0; 1060 1061 if (mtd_can_have_bb(mtd)) 1062 for (offset = 0; offset < size; offset += mtd->erasesize) 1063 if (mtd_block_isbad(mtd, offset)) 1064 badcnt++; 1065 1066 return badcnt; 1067} 1068 1069static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1070 unsigned long page, char *buf) 1071{ 1072 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1073 unsigned int newblock, mapped; 1074 struct swap_eb *eb; 1075 int ret; 1076 1077 d->sect_write_count++; 1078 1079 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1080 return -ENOSPC; 1081 1082 if (header) { 1083 /* Ignore writes to the header page */ 1084 if (unlikely(page == 0)) 1085 return 0; 1086 1087 page--; 1088 } 1089 1090 mapped = d->page_data[page]; 1091 if (mapped <= BLOCK_MAX) { 1092 eb = d->eb_data + (mapped / d->pages_per_eblk); 1093 eb->active_count--; 1094 mtdswap_store_eb(d, eb); 1095 d->page_data[page] = BLOCK_UNDEF; 1096 d->revmap[mapped] = PAGE_UNDEF; 1097 } 1098 1099 ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1100 d->mtd_write_count++; 1101 1102 if (ret < 0) 1103 return ret; 1104 1105 eb = d->eb_data + (newblock / d->pages_per_eblk); 1106 d->page_data[page] = newblock; 1107 1108 return 0; 1109} 1110 1111/* Provide a dummy swap header for the kernel */ 1112static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1113{ 1114 union swap_header *hd = (union swap_header *)(buf); 1115 1116 memset(buf, 0, PAGE_SIZE - 10); 1117 1118 hd->info.version = 1; 1119 hd->info.last_page = d->mbd_dev->size - 1; 1120 hd->info.nr_badpages = 0; 1121 1122 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1123 1124 return 0; 1125} 1126 1127static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1128 unsigned long page, char *buf) 1129{ 1130 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1131 struct mtd_info *mtd = d->mtd; 1132 unsigned int realblock, retries; 1133 loff_t readpos; 1134 struct swap_eb *eb; 1135 size_t retlen; 1136 int ret; 1137 1138 d->sect_read_count++; 1139 1140 if (header) { 1141 if (unlikely(page == 0)) 1142 return mtdswap_auto_header(d, buf); 1143 1144 page--; 1145 } 1146 1147 realblock = d->page_data[page]; 1148 if (realblock > BLOCK_MAX) { 1149 memset(buf, 0x0, PAGE_SIZE); 1150 if (realblock == BLOCK_UNDEF) 1151 return 0; 1152 else 1153 return -EIO; 1154 } 1155 1156 eb = d->eb_data + (realblock / d->pages_per_eblk); 1157 BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1158 1159 readpos = (loff_t)realblock << PAGE_SHIFT; 1160 retries = 0; 1161 1162retry: 1163 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1164 1165 d->mtd_read_count++; 1166 if (mtd_is_bitflip(ret)) { 1167 eb->flags |= EBLOCK_BITFLIP; 1168 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1169 ret = 0; 1170 } 1171 1172 if (ret < 0) { 1173 dev_err(d->dev, "Read error %d\n", ret); 1174 eb->flags |= EBLOCK_READERR; 1175 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1176 retries++; 1177 if (retries < MTDSWAP_IO_RETRIES) 1178 goto retry; 1179 1180 return ret; 1181 } 1182 1183 if (retlen != PAGE_SIZE) { 1184 dev_err(d->dev, "Short read %zd\n", retlen); 1185 return -EIO; 1186 } 1187 1188 return 0; 1189} 1190 1191static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1192 unsigned nr_pages) 1193{ 1194 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1195 unsigned long page; 1196 struct swap_eb *eb; 1197 unsigned int mapped; 1198 1199 d->discard_count++; 1200 1201 for (page = first; page < first + nr_pages; page++) { 1202 mapped = d->page_data[page]; 1203 if (mapped <= BLOCK_MAX) { 1204 eb = d->eb_data + (mapped / d->pages_per_eblk); 1205 eb->active_count--; 1206 mtdswap_store_eb(d, eb); 1207 d->page_data[page] = BLOCK_UNDEF; 1208 d->revmap[mapped] = PAGE_UNDEF; 1209 d->discard_page_count++; 1210 } else if (mapped == BLOCK_ERROR) { 1211 d->page_data[page] = BLOCK_UNDEF; 1212 d->discard_page_count++; 1213 } 1214 } 1215 1216 return 0; 1217} 1218 1219static int mtdswap_show(struct seq_file *s, void *data) 1220{ 1221 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1222 unsigned long sum; 1223 unsigned int count[MTDSWAP_TREE_CNT]; 1224 unsigned int min[MTDSWAP_TREE_CNT]; 1225 unsigned int max[MTDSWAP_TREE_CNT]; 1226 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1227 uint64_t use_size; 1228 char *name[] = {"clean", "used", "low", "high", "dirty", "bitflip", 1229 "failing"}; 1230 1231 mutex_lock(&d->mbd_dev->lock); 1232 1233 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1234 struct rb_root *root = &d->trees[i].root; 1235 1236 if (root->rb_node) { 1237 count[i] = d->trees[i].count; 1238 min[i] = rb_entry(rb_first(root), struct swap_eb, 1239 rb)->erase_count; 1240 max[i] = rb_entry(rb_last(root), struct swap_eb, 1241 rb)->erase_count; 1242 } else 1243 count[i] = 0; 1244 } 1245 1246 if (d->curr_write) { 1247 cw = 1; 1248 cwp = d->curr_write_pos; 1249 cwecount = d->curr_write->erase_count; 1250 } 1251 1252 sum = 0; 1253 for (i = 0; i < d->eblks; i++) 1254 sum += d->eb_data[i].erase_count; 1255 1256 use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1257 bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1258 1259 mapped = 0; 1260 pages = d->mbd_dev->size; 1261 for (i = 0; i < pages; i++) 1262 if (d->page_data[i] != BLOCK_UNDEF) 1263 mapped++; 1264 1265 mutex_unlock(&d->mbd_dev->lock); 1266 1267 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1268 if (!count[i]) 1269 continue; 1270 1271 if (min[i] != max[i]) 1272 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1273 "max %d times\n", 1274 name[i], count[i], min[i], max[i]); 1275 else 1276 seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1277 "times\n", name[i], count[i], min[i]); 1278 } 1279 1280 if (bb_cnt) 1281 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1282 1283 if (cw) 1284 seq_printf(s, "current erase block: %u pages used, %u free, " 1285 "erased %u times\n", 1286 cwp, d->pages_per_eblk - cwp, cwecount); 1287 1288 seq_printf(s, "total erasures: %lu\n", sum); 1289 1290 seq_puts(s, "\n"); 1291 1292 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1293 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1294 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1295 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1296 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1297 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1298 1299 seq_puts(s, "\n"); 1300 seq_printf(s, "total pages: %u\n", pages); 1301 seq_printf(s, "pages mapped: %u\n", mapped); 1302 1303 return 0; 1304} 1305 1306static int mtdswap_open(struct inode *inode, struct file *file) 1307{ 1308 return single_open(file, mtdswap_show, inode->i_private); 1309} 1310 1311static const struct file_operations mtdswap_fops = { 1312 .open = mtdswap_open, 1313 .read = seq_read, 1314 .llseek = seq_lseek, 1315 .release = single_release, 1316}; 1317 1318static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1319{ 1320 struct gendisk *gd = d->mbd_dev->disk; 1321 struct device *dev = disk_to_dev(gd); 1322 1323 struct dentry *root; 1324 struct dentry *dent; 1325 1326 root = debugfs_create_dir(gd->disk_name, NULL); 1327 if (IS_ERR(root)) 1328 return 0; 1329 1330 if (!root) { 1331 dev_err(dev, "failed to initialize debugfs\n"); 1332 return -1; 1333 } 1334 1335 d->debugfs_root = root; 1336 1337 dent = debugfs_create_file("stats", S_IRUSR, root, d, 1338 &mtdswap_fops); 1339 if (!dent) { 1340 dev_err(d->dev, "debugfs_create_file failed\n"); 1341 debugfs_remove_recursive(root); 1342 d->debugfs_root = NULL; 1343 return -1; 1344 } 1345 1346 return 0; 1347} 1348 1349static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1350 unsigned int spare_cnt) 1351{ 1352 struct mtd_info *mtd = d->mbd_dev->mtd; 1353 unsigned int i, eblk_bytes, pages, blocks; 1354 int ret = -ENOMEM; 1355 1356 d->mtd = mtd; 1357 d->eblks = eblocks; 1358 d->spare_eblks = spare_cnt; 1359 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1360 1361 pages = d->mbd_dev->size; 1362 blocks = eblocks * d->pages_per_eblk; 1363 1364 for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1365 d->trees[i].root = RB_ROOT; 1366 1367 d->page_data = vmalloc(sizeof(int)*pages); 1368 if (!d->page_data) 1369 goto page_data_fail; 1370 1371 d->revmap = vmalloc(sizeof(int)*blocks); 1372 if (!d->revmap) 1373 goto revmap_fail; 1374 1375 eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1376 d->eb_data = vzalloc(eblk_bytes); 1377 if (!d->eb_data) 1378 goto eb_data_fail; 1379 1380 for (i = 0; i < pages; i++) 1381 d->page_data[i] = BLOCK_UNDEF; 1382 1383 for (i = 0; i < blocks; i++) 1384 d->revmap[i] = PAGE_UNDEF; 1385 1386 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1387 if (!d->page_buf) 1388 goto page_buf_fail; 1389 1390 d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL); 1391 if (!d->oob_buf) 1392 goto oob_buf_fail; 1393 1394 mtdswap_scan_eblks(d); 1395 1396 return 0; 1397 1398oob_buf_fail: 1399 kfree(d->page_buf); 1400page_buf_fail: 1401 vfree(d->eb_data); 1402eb_data_fail: 1403 vfree(d->revmap); 1404revmap_fail: 1405 vfree(d->page_data); 1406page_data_fail: 1407 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1408 return ret; 1409} 1410 1411static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1412{ 1413 struct mtdswap_dev *d; 1414 struct mtd_blktrans_dev *mbd_dev; 1415 char *parts; 1416 char *this_opt; 1417 unsigned long part; 1418 unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1419 uint64_t swap_size, use_size, size_limit; 1420 struct nand_ecclayout *oinfo; 1421 int ret; 1422 1423 parts = &partitions[0]; 1424 if (!*parts) 1425 return; 1426 1427 while ((this_opt = strsep(&parts, ",")) != NULL) { 1428 if (kstrtoul(this_opt, 0, &part) < 0) 1429 return; 1430 1431 if (mtd->index == part) 1432 break; 1433 } 1434 1435 if (mtd->index != part) 1436 return; 1437 1438 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1439 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1440 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1441 return; 1442 } 1443 1444 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1445 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1446 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1447 return; 1448 } 1449 1450 oinfo = mtd->ecclayout; 1451 if (!oinfo) { 1452 printk(KERN_ERR "%s: mtd%d does not have OOB\n", 1453 MTDSWAP_PREFIX, mtd->index); 1454 return; 1455 } 1456 1457 if (!mtd->oobsize || oinfo->oobavail < MTDSWAP_OOBSIZE) { 1458 printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1459 "%d available, %zu needed.\n", 1460 MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE); 1461 return; 1462 } 1463 1464 if (spare_eblocks > 100) 1465 spare_eblocks = 100; 1466 1467 use_size = mtd->size; 1468 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1469 1470 if (mtd->size > size_limit) { 1471 printk(KERN_WARNING "%s: Device too large. Limiting size to " 1472 "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1473 use_size = size_limit; 1474 } 1475 1476 eblocks = mtd_div_by_eb(use_size, mtd); 1477 use_size = (uint64_t)eblocks * mtd->erasesize; 1478 bad_blocks = mtdswap_badblocks(mtd, use_size); 1479 eavailable = eblocks - bad_blocks; 1480 1481 if (eavailable < MIN_ERASE_BLOCKS) { 1482 printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1483 "%d needed\n", MTDSWAP_PREFIX, eavailable, 1484 MIN_ERASE_BLOCKS); 1485 return; 1486 } 1487 1488 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1489 1490 if (spare_cnt < MIN_SPARE_EBLOCKS) 1491 spare_cnt = MIN_SPARE_EBLOCKS; 1492 1493 if (spare_cnt > eavailable - 1) 1494 spare_cnt = eavailable - 1; 1495 1496 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1497 (header ? PAGE_SIZE : 0); 1498 1499 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1500 "%u spare, %u bad blocks\n", 1501 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1502 1503 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1504 if (!d) 1505 return; 1506 1507 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1508 if (!mbd_dev) { 1509 kfree(d); 1510 return; 1511 } 1512 1513 d->mbd_dev = mbd_dev; 1514 mbd_dev->priv = d; 1515 1516 mbd_dev->mtd = mtd; 1517 mbd_dev->devnum = mtd->index; 1518 mbd_dev->size = swap_size >> PAGE_SHIFT; 1519 mbd_dev->tr = tr; 1520 1521 if (!(mtd->flags & MTD_WRITEABLE)) 1522 mbd_dev->readonly = 1; 1523 1524 if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1525 goto init_failed; 1526 1527 if (add_mtd_blktrans_dev(mbd_dev) < 0) 1528 goto cleanup; 1529 1530 d->dev = disk_to_dev(mbd_dev->disk); 1531 1532 ret = mtdswap_add_debugfs(d); 1533 if (ret < 0) 1534 goto debugfs_failed; 1535 1536 return; 1537 1538debugfs_failed: 1539 del_mtd_blktrans_dev(mbd_dev); 1540 1541cleanup: 1542 mtdswap_cleanup(d); 1543 1544init_failed: 1545 kfree(mbd_dev); 1546 kfree(d); 1547} 1548 1549static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1550{ 1551 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1552 1553 debugfs_remove_recursive(d->debugfs_root); 1554 del_mtd_blktrans_dev(dev); 1555 mtdswap_cleanup(d); 1556 kfree(d); 1557} 1558 1559static struct mtd_blktrans_ops mtdswap_ops = { 1560 .name = "mtdswap", 1561 .major = 0, 1562 .part_bits = 0, 1563 .blksize = PAGE_SIZE, 1564 .flush = mtdswap_flush, 1565 .readsect = mtdswap_readsect, 1566 .writesect = mtdswap_writesect, 1567 .discard = mtdswap_discard, 1568 .background = mtdswap_background, 1569 .add_mtd = mtdswap_add_mtd, 1570 .remove_dev = mtdswap_remove_dev, 1571 .owner = THIS_MODULE, 1572}; 1573 1574static int __init mtdswap_modinit(void) 1575{ 1576 return register_mtd_blktrans(&mtdswap_ops); 1577} 1578 1579static void __exit mtdswap_modexit(void) 1580{ 1581 deregister_mtd_blktrans(&mtdswap_ops); 1582} 1583 1584module_init(mtdswap_modinit); 1585module_exit(mtdswap_modexit); 1586 1587 1588MODULE_LICENSE("GPL"); 1589MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1590MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1591 "swap space"); 1592