root/drivers/mtd/mtdswap.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mtdswap_eb_offset
  2. mtdswap_eb_detach
  3. __mtdswap_rb_add
  4. mtdswap_rb_add
  5. mtdswap_rb_index
  6. mtdswap_handle_badblock
  7. mtdswap_handle_write_error
  8. mtdswap_read_oob
  9. mtdswap_read_markers
  10. mtdswap_write_marker
  11. mtdswap_check_counts
  12. mtdswap_scan_eblks
  13. mtdswap_store_eb
  14. mtdswap_erase_block
  15. mtdswap_map_free_block
  16. mtdswap_free_page_cnt
  17. mtdswap_enough_free_pages
  18. mtdswap_write_block
  19. mtdswap_move_block
  20. mtdswap_gc_eblock
  21. __mtdswap_choose_gc_tree
  22. mtdswap_wlfreq
  23. mtdswap_choose_wl_tree
  24. mtdswap_choose_gc_tree
  25. mtdswap_pick_gc_eblk
  26. mtdswap_test_patt
  27. mtdswap_eblk_passes
  28. mtdswap_gc
  29. mtdswap_background
  30. mtdswap_cleanup
  31. mtdswap_flush
  32. mtdswap_badblocks
  33. mtdswap_writesect
  34. mtdswap_auto_header
  35. mtdswap_readsect
  36. mtdswap_discard
  37. mtdswap_show
  38. mtdswap_add_debugfs
  39. mtdswap_init
  40. mtdswap_add_mtd
  41. mtdswap_remove_dev
  42. mtdswap_modinit
  43. mtdswap_modexit

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Swap block device support for MTDs
   4  * Turns an MTD device into a swap device with block wear leveling
   5  *
   6  * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
   7  *
   8  * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
   9  *
  10  * Based on Richard Purdie's earlier implementation in 2007. Background
  11  * support and lock-less operation written by Adrian Hunter.
  12  */
  13 
  14 #include <linux/kernel.h>
  15 #include <linux/module.h>
  16 #include <linux/mtd/mtd.h>
  17 #include <linux/mtd/blktrans.h>
  18 #include <linux/rbtree.h>
  19 #include <linux/sched.h>
  20 #include <linux/slab.h>
  21 #include <linux/vmalloc.h>
  22 #include <linux/genhd.h>
  23 #include <linux/swap.h>
  24 #include <linux/debugfs.h>
  25 #include <linux/seq_file.h>
  26 #include <linux/device.h>
  27 #include <linux/math64.h>
  28 
  29 #define MTDSWAP_PREFIX "mtdswap"
  30 
  31 /*
  32  * The number of free eraseblocks when GC should stop
  33  */
  34 #define CLEAN_BLOCK_THRESHOLD   20
  35 
  36 /*
  37  * Number of free eraseblocks below which GC can also collect low frag
  38  * blocks.
  39  */
  40 #define LOW_FRAG_GC_THRESHOLD   5
  41 
  42 /*
  43  * Wear level cost amortization. We want to do wear leveling on the background
  44  * without disturbing gc too much. This is made by defining max GC frequency.
  45  * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
  46  * on the biggest wear difference rather than the biggest dirtiness.
  47  *
  48  * The lower freq2 should be chosen so that it makes sure the maximum erase
  49  * difference will decrease even if a malicious application is deliberately
  50  * trying to make erase differences large.
  51  */
  52 #define MAX_ERASE_DIFF          4000
  53 #define COLLECT_NONDIRTY_BASE   MAX_ERASE_DIFF
  54 #define COLLECT_NONDIRTY_FREQ1  6
  55 #define COLLECT_NONDIRTY_FREQ2  4
  56 
  57 #define PAGE_UNDEF              UINT_MAX
  58 #define BLOCK_UNDEF             UINT_MAX
  59 #define BLOCK_ERROR             (UINT_MAX - 1)
  60 #define BLOCK_MAX               (UINT_MAX - 2)
  61 
  62 #define EBLOCK_BAD              (1 << 0)
  63 #define EBLOCK_NOMAGIC          (1 << 1)
  64 #define EBLOCK_BITFLIP          (1 << 2)
  65 #define EBLOCK_FAILED           (1 << 3)
  66 #define EBLOCK_READERR          (1 << 4)
  67 #define EBLOCK_IDX_SHIFT        5
  68 
  69 struct swap_eb {
  70         struct rb_node rb;
  71         struct rb_root *root;
  72 
  73         unsigned int flags;
  74         unsigned int active_count;
  75         unsigned int erase_count;
  76         unsigned int pad;               /* speeds up pointer decrement */
  77 };
  78 
  79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
  80                                 rb)->erase_count)
  81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
  82                                 rb)->erase_count)
  83 
  84 struct mtdswap_tree {
  85         struct rb_root root;
  86         unsigned int count;
  87 };
  88 
  89 enum {
  90         MTDSWAP_CLEAN,
  91         MTDSWAP_USED,
  92         MTDSWAP_LOWFRAG,
  93         MTDSWAP_HIFRAG,
  94         MTDSWAP_DIRTY,
  95         MTDSWAP_BITFLIP,
  96         MTDSWAP_FAILING,
  97         MTDSWAP_TREE_CNT,
  98 };
  99 
 100 struct mtdswap_dev {
 101         struct mtd_blktrans_dev *mbd_dev;
 102         struct mtd_info *mtd;
 103         struct device *dev;
 104 
 105         unsigned int *page_data;
 106         unsigned int *revmap;
 107 
 108         unsigned int eblks;
 109         unsigned int spare_eblks;
 110         unsigned int pages_per_eblk;
 111         unsigned int max_erase_count;
 112         struct swap_eb *eb_data;
 113 
 114         struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
 115 
 116         unsigned long long sect_read_count;
 117         unsigned long long sect_write_count;
 118         unsigned long long mtd_write_count;
 119         unsigned long long mtd_read_count;
 120         unsigned long long discard_count;
 121         unsigned long long discard_page_count;
 122 
 123         unsigned int curr_write_pos;
 124         struct swap_eb *curr_write;
 125 
 126         char *page_buf;
 127         char *oob_buf;
 128 };
 129 
 130 struct mtdswap_oobdata {
 131         __le16 magic;
 132         __le32 count;
 133 } __packed;
 134 
 135 #define MTDSWAP_MAGIC_CLEAN     0x2095
 136 #define MTDSWAP_MAGIC_DIRTY     (MTDSWAP_MAGIC_CLEAN + 1)
 137 #define MTDSWAP_TYPE_CLEAN      0
 138 #define MTDSWAP_TYPE_DIRTY      1
 139 #define MTDSWAP_OOBSIZE         sizeof(struct mtdswap_oobdata)
 140 
 141 #define MTDSWAP_ERASE_RETRIES   3 /* Before marking erase block bad */
 142 #define MTDSWAP_IO_RETRIES      3
 143 
 144 enum {
 145         MTDSWAP_SCANNED_CLEAN,
 146         MTDSWAP_SCANNED_DIRTY,
 147         MTDSWAP_SCANNED_BITFLIP,
 148         MTDSWAP_SCANNED_BAD,
 149 };
 150 
 151 /*
 152  * In the worst case mtdswap_writesect() has allocated the last clean
 153  * page from the current block and is then pre-empted by the GC
 154  * thread. The thread can consume a full erase block when moving a
 155  * block.
 156  */
 157 #define MIN_SPARE_EBLOCKS       2
 158 #define MIN_ERASE_BLOCKS        (MIN_SPARE_EBLOCKS + 1)
 159 
 160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
 161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
 162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
 163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
 164 
 165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
 166 
 167 static char partitions[128] = "";
 168 module_param_string(partitions, partitions, sizeof(partitions), 0444);
 169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
 170                 "partitions=\"1,3,5\"");
 171 
 172 static unsigned int spare_eblocks = 10;
 173 module_param(spare_eblocks, uint, 0444);
 174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
 175                 "garbage collection (default 10%)");
 176 
 177 static bool header; /* false */
 178 module_param(header, bool, 0444);
 179 MODULE_PARM_DESC(header,
 180                 "Include builtin swap header (default 0, without header)");
 181 
 182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
 183 
 184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
 185 {
 186         return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
 187 }
 188 
 189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
 190 {
 191         unsigned int oldidx;
 192         struct mtdswap_tree *tp;
 193 
 194         if (eb->root) {
 195                 tp = container_of(eb->root, struct mtdswap_tree, root);
 196                 oldidx = tp - &d->trees[0];
 197 
 198                 d->trees[oldidx].count--;
 199                 rb_erase(&eb->rb, eb->root);
 200         }
 201 }
 202 
 203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
 204 {
 205         struct rb_node **p, *parent = NULL;
 206         struct swap_eb *cur;
 207 
 208         p = &root->rb_node;
 209         while (*p) {
 210                 parent = *p;
 211                 cur = rb_entry(parent, struct swap_eb, rb);
 212                 if (eb->erase_count > cur->erase_count)
 213                         p = &(*p)->rb_right;
 214                 else
 215                         p = &(*p)->rb_left;
 216         }
 217 
 218         rb_link_node(&eb->rb, parent, p);
 219         rb_insert_color(&eb->rb, root);
 220 }
 221 
 222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
 223 {
 224         struct rb_root *root;
 225 
 226         if (eb->root == &d->trees[idx].root)
 227                 return;
 228 
 229         mtdswap_eb_detach(d, eb);
 230         root = &d->trees[idx].root;
 231         __mtdswap_rb_add(root, eb);
 232         eb->root = root;
 233         d->trees[idx].count++;
 234 }
 235 
 236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
 237 {
 238         struct rb_node *p;
 239         unsigned int i;
 240 
 241         p = rb_first(root);
 242         i = 0;
 243         while (i < idx && p) {
 244                 p = rb_next(p);
 245                 i++;
 246         }
 247 
 248         return p;
 249 }
 250 
 251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
 252 {
 253         int ret;
 254         loff_t offset;
 255 
 256         d->spare_eblks--;
 257         eb->flags |= EBLOCK_BAD;
 258         mtdswap_eb_detach(d, eb);
 259         eb->root = NULL;
 260 
 261         /* badblocks not supported */
 262         if (!mtd_can_have_bb(d->mtd))
 263                 return 1;
 264 
 265         offset = mtdswap_eb_offset(d, eb);
 266         dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
 267         ret = mtd_block_markbad(d->mtd, offset);
 268 
 269         if (ret) {
 270                 dev_warn(d->dev, "Mark block bad failed for block at %08llx "
 271                         "error %d\n", offset, ret);
 272                 return ret;
 273         }
 274 
 275         return 1;
 276 
 277 }
 278 
 279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
 280 {
 281         unsigned int marked = eb->flags & EBLOCK_FAILED;
 282         struct swap_eb *curr_write = d->curr_write;
 283 
 284         eb->flags |= EBLOCK_FAILED;
 285         if (curr_write == eb) {
 286                 d->curr_write = NULL;
 287 
 288                 if (!marked && d->curr_write_pos != 0) {
 289                         mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
 290                         return 0;
 291                 }
 292         }
 293 
 294         return mtdswap_handle_badblock(d, eb);
 295 }
 296 
 297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
 298                         struct mtd_oob_ops *ops)
 299 {
 300         int ret = mtd_read_oob(d->mtd, from, ops);
 301 
 302         if (mtd_is_bitflip(ret))
 303                 return ret;
 304 
 305         if (ret) {
 306                 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
 307                         ret, from);
 308                 return ret;
 309         }
 310 
 311         if (ops->oobretlen < ops->ooblen) {
 312                 dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
 313                         "%zd) for block at %08llx\n",
 314                         ops->oobretlen, ops->ooblen, from);
 315                 return -EIO;
 316         }
 317 
 318         return 0;
 319 }
 320 
 321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 322 {
 323         struct mtdswap_oobdata *data, *data2;
 324         int ret;
 325         loff_t offset;
 326         struct mtd_oob_ops ops;
 327 
 328         offset = mtdswap_eb_offset(d, eb);
 329 
 330         /* Check first if the block is bad. */
 331         if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
 332                 return MTDSWAP_SCANNED_BAD;
 333 
 334         ops.ooblen = 2 * d->mtd->oobavail;
 335         ops.oobbuf = d->oob_buf;
 336         ops.ooboffs = 0;
 337         ops.datbuf = NULL;
 338         ops.mode = MTD_OPS_AUTO_OOB;
 339 
 340         ret = mtdswap_read_oob(d, offset, &ops);
 341 
 342         if (ret && !mtd_is_bitflip(ret))
 343                 return ret;
 344 
 345         data = (struct mtdswap_oobdata *)d->oob_buf;
 346         data2 = (struct mtdswap_oobdata *)
 347                 (d->oob_buf + d->mtd->oobavail);
 348 
 349         if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
 350                 eb->erase_count = le32_to_cpu(data->count);
 351                 if (mtd_is_bitflip(ret))
 352                         ret = MTDSWAP_SCANNED_BITFLIP;
 353                 else {
 354                         if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
 355                                 ret = MTDSWAP_SCANNED_DIRTY;
 356                         else
 357                                 ret = MTDSWAP_SCANNED_CLEAN;
 358                 }
 359         } else {
 360                 eb->flags |= EBLOCK_NOMAGIC;
 361                 ret = MTDSWAP_SCANNED_DIRTY;
 362         }
 363 
 364         return ret;
 365 }
 366 
 367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
 368                                 u16 marker)
 369 {
 370         struct mtdswap_oobdata n;
 371         int ret;
 372         loff_t offset;
 373         struct mtd_oob_ops ops;
 374 
 375         ops.ooboffs = 0;
 376         ops.oobbuf = (uint8_t *)&n;
 377         ops.mode = MTD_OPS_AUTO_OOB;
 378         ops.datbuf = NULL;
 379 
 380         if (marker == MTDSWAP_TYPE_CLEAN) {
 381                 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
 382                 n.count = cpu_to_le32(eb->erase_count);
 383                 ops.ooblen = MTDSWAP_OOBSIZE;
 384                 offset = mtdswap_eb_offset(d, eb);
 385         } else {
 386                 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
 387                 ops.ooblen = sizeof(n.magic);
 388                 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
 389         }
 390 
 391         ret = mtd_write_oob(d->mtd, offset, &ops);
 392 
 393         if (ret) {
 394                 dev_warn(d->dev, "Write OOB failed for block at %08llx "
 395                         "error %d\n", offset, ret);
 396                 if (ret == -EIO || mtd_is_eccerr(ret))
 397                         mtdswap_handle_write_error(d, eb);
 398                 return ret;
 399         }
 400 
 401         if (ops.oobretlen != ops.ooblen) {
 402                 dev_warn(d->dev, "Short OOB write for block at %08llx: "
 403                         "%zd not %zd\n",
 404                         offset, ops.oobretlen, ops.ooblen);
 405                 return ret;
 406         }
 407 
 408         return 0;
 409 }
 410 
 411 /*
 412  * Are there any erase blocks without MAGIC_CLEAN header, presumably
 413  * because power was cut off after erase but before header write? We
 414  * need to guestimate the erase count.
 415  */
 416 static void mtdswap_check_counts(struct mtdswap_dev *d)
 417 {
 418         struct rb_root hist_root = RB_ROOT;
 419         struct rb_node *medrb;
 420         struct swap_eb *eb;
 421         unsigned int i, cnt, median;
 422 
 423         cnt = 0;
 424         for (i = 0; i < d->eblks; i++) {
 425                 eb = d->eb_data + i;
 426 
 427                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
 428                         continue;
 429 
 430                 __mtdswap_rb_add(&hist_root, eb);
 431                 cnt++;
 432         }
 433 
 434         if (cnt == 0)
 435                 return;
 436 
 437         medrb = mtdswap_rb_index(&hist_root, cnt / 2);
 438         median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
 439 
 440         d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
 441 
 442         for (i = 0; i < d->eblks; i++) {
 443                 eb = d->eb_data + i;
 444 
 445                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
 446                         eb->erase_count = median;
 447 
 448                 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
 449                         continue;
 450 
 451                 rb_erase(&eb->rb, &hist_root);
 452         }
 453 }
 454 
 455 static void mtdswap_scan_eblks(struct mtdswap_dev *d)
 456 {
 457         int status;
 458         unsigned int i, idx;
 459         struct swap_eb *eb;
 460 
 461         for (i = 0; i < d->eblks; i++) {
 462                 eb = d->eb_data + i;
 463 
 464                 status = mtdswap_read_markers(d, eb);
 465                 if (status < 0)
 466                         eb->flags |= EBLOCK_READERR;
 467                 else if (status == MTDSWAP_SCANNED_BAD) {
 468                         eb->flags |= EBLOCK_BAD;
 469                         continue;
 470                 }
 471 
 472                 switch (status) {
 473                 case MTDSWAP_SCANNED_CLEAN:
 474                         idx = MTDSWAP_CLEAN;
 475                         break;
 476                 case MTDSWAP_SCANNED_DIRTY:
 477                 case MTDSWAP_SCANNED_BITFLIP:
 478                         idx = MTDSWAP_DIRTY;
 479                         break;
 480                 default:
 481                         idx = MTDSWAP_FAILING;
 482                 }
 483 
 484                 eb->flags |= (idx << EBLOCK_IDX_SHIFT);
 485         }
 486 
 487         mtdswap_check_counts(d);
 488 
 489         for (i = 0; i < d->eblks; i++) {
 490                 eb = d->eb_data + i;
 491 
 492                 if (eb->flags & EBLOCK_BAD)
 493                         continue;
 494 
 495                 idx = eb->flags >> EBLOCK_IDX_SHIFT;
 496                 mtdswap_rb_add(d, eb, idx);
 497         }
 498 }
 499 
 500 /*
 501  * Place eblk into a tree corresponding to its number of active blocks
 502  * it contains.
 503  */
 504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
 505 {
 506         unsigned int weight = eb->active_count;
 507         unsigned int maxweight = d->pages_per_eblk;
 508 
 509         if (eb == d->curr_write)
 510                 return;
 511 
 512         if (eb->flags & EBLOCK_BITFLIP)
 513                 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
 514         else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
 515                 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
 516         if (weight == maxweight)
 517                 mtdswap_rb_add(d, eb, MTDSWAP_USED);
 518         else if (weight == 0)
 519                 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
 520         else if (weight > (maxweight/2))
 521                 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
 522         else
 523                 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
 524 }
 525 
 526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
 527 {
 528         struct mtd_info *mtd = d->mtd;
 529         struct erase_info erase;
 530         unsigned int retries = 0;
 531         int ret;
 532 
 533         eb->erase_count++;
 534         if (eb->erase_count > d->max_erase_count)
 535                 d->max_erase_count = eb->erase_count;
 536 
 537 retry:
 538         memset(&erase, 0, sizeof(struct erase_info));
 539         erase.addr      = mtdswap_eb_offset(d, eb);
 540         erase.len       = mtd->erasesize;
 541 
 542         ret = mtd_erase(mtd, &erase);
 543         if (ret) {
 544                 if (retries++ < MTDSWAP_ERASE_RETRIES) {
 545                         dev_warn(d->dev,
 546                                 "erase of erase block %#llx on %s failed",
 547                                 erase.addr, mtd->name);
 548                         yield();
 549                         goto retry;
 550                 }
 551 
 552                 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
 553                         erase.addr, mtd->name);
 554 
 555                 mtdswap_handle_badblock(d, eb);
 556                 return -EIO;
 557         }
 558 
 559         return 0;
 560 }
 561 
 562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
 563                                 unsigned int *block)
 564 {
 565         int ret;
 566         struct swap_eb *old_eb = d->curr_write;
 567         struct rb_root *clean_root;
 568         struct swap_eb *eb;
 569 
 570         if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
 571                 do {
 572                         if (TREE_EMPTY(d, CLEAN))
 573                                 return -ENOSPC;
 574 
 575                         clean_root = TREE_ROOT(d, CLEAN);
 576                         eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
 577                         rb_erase(&eb->rb, clean_root);
 578                         eb->root = NULL;
 579                         TREE_COUNT(d, CLEAN)--;
 580 
 581                         ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
 582                 } while (ret == -EIO || mtd_is_eccerr(ret));
 583 
 584                 if (ret)
 585                         return ret;
 586 
 587                 d->curr_write_pos = 0;
 588                 d->curr_write = eb;
 589                 if (old_eb)
 590                         mtdswap_store_eb(d, old_eb);
 591         }
 592 
 593         *block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
 594                 d->curr_write_pos;
 595 
 596         d->curr_write->active_count++;
 597         d->revmap[*block] = page;
 598         d->curr_write_pos++;
 599 
 600         return 0;
 601 }
 602 
 603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
 604 {
 605         return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
 606                 d->pages_per_eblk - d->curr_write_pos;
 607 }
 608 
 609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
 610 {
 611         return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
 612 }
 613 
 614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
 615                         unsigned int page, unsigned int *bp, int gc_context)
 616 {
 617         struct mtd_info *mtd = d->mtd;
 618         struct swap_eb *eb;
 619         size_t retlen;
 620         loff_t writepos;
 621         int ret;
 622 
 623 retry:
 624         if (!gc_context)
 625                 while (!mtdswap_enough_free_pages(d))
 626                         if (mtdswap_gc(d, 0) > 0)
 627                                 return -ENOSPC;
 628 
 629         ret = mtdswap_map_free_block(d, page, bp);
 630         eb = d->eb_data + (*bp / d->pages_per_eblk);
 631 
 632         if (ret == -EIO || mtd_is_eccerr(ret)) {
 633                 d->curr_write = NULL;
 634                 eb->active_count--;
 635                 d->revmap[*bp] = PAGE_UNDEF;
 636                 goto retry;
 637         }
 638 
 639         if (ret < 0)
 640                 return ret;
 641 
 642         writepos = (loff_t)*bp << PAGE_SHIFT;
 643         ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
 644         if (ret == -EIO || mtd_is_eccerr(ret)) {
 645                 d->curr_write_pos--;
 646                 eb->active_count--;
 647                 d->revmap[*bp] = PAGE_UNDEF;
 648                 mtdswap_handle_write_error(d, eb);
 649                 goto retry;
 650         }
 651 
 652         if (ret < 0) {
 653                 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
 654                         ret, retlen);
 655                 goto err;
 656         }
 657 
 658         if (retlen != PAGE_SIZE) {
 659                 dev_err(d->dev, "Short write to MTD device: %zd written",
 660                         retlen);
 661                 ret = -EIO;
 662                 goto err;
 663         }
 664 
 665         return ret;
 666 
 667 err:
 668         d->curr_write_pos--;
 669         eb->active_count--;
 670         d->revmap[*bp] = PAGE_UNDEF;
 671 
 672         return ret;
 673 }
 674 
 675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
 676                 unsigned int *newblock)
 677 {
 678         struct mtd_info *mtd = d->mtd;
 679         struct swap_eb *eb, *oldeb;
 680         int ret;
 681         size_t retlen;
 682         unsigned int page, retries;
 683         loff_t readpos;
 684 
 685         page = d->revmap[oldblock];
 686         readpos = (loff_t) oldblock << PAGE_SHIFT;
 687         retries = 0;
 688 
 689 retry:
 690         ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
 691 
 692         if (ret < 0 && !mtd_is_bitflip(ret)) {
 693                 oldeb = d->eb_data + oldblock / d->pages_per_eblk;
 694                 oldeb->flags |= EBLOCK_READERR;
 695 
 696                 dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
 697                         oldblock);
 698                 retries++;
 699                 if (retries < MTDSWAP_IO_RETRIES)
 700                         goto retry;
 701 
 702                 goto read_error;
 703         }
 704 
 705         if (retlen != PAGE_SIZE) {
 706                 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
 707                        oldblock);
 708                 ret = -EIO;
 709                 goto read_error;
 710         }
 711 
 712         ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
 713         if (ret < 0) {
 714                 d->page_data[page] = BLOCK_ERROR;
 715                 dev_err(d->dev, "Write error: %d\n", ret);
 716                 return ret;
 717         }
 718 
 719         eb = d->eb_data + *newblock / d->pages_per_eblk;
 720         d->page_data[page] = *newblock;
 721         d->revmap[oldblock] = PAGE_UNDEF;
 722         eb = d->eb_data + oldblock / d->pages_per_eblk;
 723         eb->active_count--;
 724 
 725         return 0;
 726 
 727 read_error:
 728         d->page_data[page] = BLOCK_ERROR;
 729         d->revmap[oldblock] = PAGE_UNDEF;
 730         return ret;
 731 }
 732 
 733 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
 734 {
 735         unsigned int i, block, eblk_base, newblock;
 736         int ret, errcode;
 737 
 738         errcode = 0;
 739         eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
 740 
 741         for (i = 0; i < d->pages_per_eblk; i++) {
 742                 if (d->spare_eblks < MIN_SPARE_EBLOCKS)
 743                         return -ENOSPC;
 744 
 745                 block = eblk_base + i;
 746                 if (d->revmap[block] == PAGE_UNDEF)
 747                         continue;
 748 
 749                 ret = mtdswap_move_block(d, block, &newblock);
 750                 if (ret < 0 && !errcode)
 751                         errcode = ret;
 752         }
 753 
 754         return errcode;
 755 }
 756 
 757 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
 758 {
 759         int idx, stopat;
 760 
 761         if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD)
 762                 stopat = MTDSWAP_LOWFRAG;
 763         else
 764                 stopat = MTDSWAP_HIFRAG;
 765 
 766         for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
 767                 if (d->trees[idx].root.rb_node != NULL)
 768                         return idx;
 769 
 770         return -1;
 771 }
 772 
 773 static int mtdswap_wlfreq(unsigned int maxdiff)
 774 {
 775         unsigned int h, x, y, dist, base;
 776 
 777         /*
 778          * Calculate linear ramp down from f1 to f2 when maxdiff goes from
 779          * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE.  Similar
 780          * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
 781          */
 782 
 783         dist = maxdiff - MAX_ERASE_DIFF;
 784         if (dist > COLLECT_NONDIRTY_BASE)
 785                 dist = COLLECT_NONDIRTY_BASE;
 786 
 787         /*
 788          * Modelling the slop as right angular triangle with base
 789          * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
 790          * equal to the ratio h/base.
 791          */
 792         h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
 793         base = COLLECT_NONDIRTY_BASE;
 794 
 795         x = dist - base;
 796         y = (x * h + base / 2) / base;
 797 
 798         return COLLECT_NONDIRTY_FREQ2 + y;
 799 }
 800 
 801 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
 802 {
 803         static unsigned int pick_cnt;
 804         unsigned int i, idx = -1, wear, max;
 805         struct rb_root *root;
 806 
 807         max = 0;
 808         for (i = 0; i <= MTDSWAP_DIRTY; i++) {
 809                 root = &d->trees[i].root;
 810                 if (root->rb_node == NULL)
 811                         continue;
 812 
 813                 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
 814                 if (wear > max) {
 815                         max = wear;
 816                         idx = i;
 817                 }
 818         }
 819 
 820         if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
 821                 pick_cnt = 0;
 822                 return idx;
 823         }
 824 
 825         pick_cnt++;
 826         return -1;
 827 }
 828 
 829 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
 830                                 unsigned int background)
 831 {
 832         int idx;
 833 
 834         if (TREE_NONEMPTY(d, FAILING) &&
 835                 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
 836                 return MTDSWAP_FAILING;
 837 
 838         idx = mtdswap_choose_wl_tree(d);
 839         if (idx >= MTDSWAP_CLEAN)
 840                 return idx;
 841 
 842         return __mtdswap_choose_gc_tree(d);
 843 }
 844 
 845 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
 846                                         unsigned int background)
 847 {
 848         struct rb_root *rp = NULL;
 849         struct swap_eb *eb = NULL;
 850         int idx;
 851 
 852         if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
 853                 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
 854                 return NULL;
 855 
 856         idx = mtdswap_choose_gc_tree(d, background);
 857         if (idx < 0)
 858                 return NULL;
 859 
 860         rp = &d->trees[idx].root;
 861         eb = rb_entry(rb_first(rp), struct swap_eb, rb);
 862 
 863         rb_erase(&eb->rb, rp);
 864         eb->root = NULL;
 865         d->trees[idx].count--;
 866         return eb;
 867 }
 868 
 869 static unsigned int mtdswap_test_patt(unsigned int i)
 870 {
 871         return i % 2 ? 0x55555555 : 0xAAAAAAAA;
 872 }
 873 
 874 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 875                                         struct swap_eb *eb)
 876 {
 877         struct mtd_info *mtd = d->mtd;
 878         unsigned int test, i, j, patt, mtd_pages;
 879         loff_t base, pos;
 880         unsigned int *p1 = (unsigned int *)d->page_buf;
 881         unsigned char *p2 = (unsigned char *)d->oob_buf;
 882         struct mtd_oob_ops ops;
 883         int ret;
 884 
 885         ops.mode = MTD_OPS_AUTO_OOB;
 886         ops.len = mtd->writesize;
 887         ops.ooblen = mtd->oobavail;
 888         ops.ooboffs = 0;
 889         ops.datbuf = d->page_buf;
 890         ops.oobbuf = d->oob_buf;
 891         base = mtdswap_eb_offset(d, eb);
 892         mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
 893 
 894         for (test = 0; test < 2; test++) {
 895                 pos = base;
 896                 for (i = 0; i < mtd_pages; i++) {
 897                         patt = mtdswap_test_patt(test + i);
 898                         memset(d->page_buf, patt, mtd->writesize);
 899                         memset(d->oob_buf, patt, mtd->oobavail);
 900                         ret = mtd_write_oob(mtd, pos, &ops);
 901                         if (ret)
 902                                 goto error;
 903 
 904                         pos += mtd->writesize;
 905                 }
 906 
 907                 pos = base;
 908                 for (i = 0; i < mtd_pages; i++) {
 909                         ret = mtd_read_oob(mtd, pos, &ops);
 910                         if (ret)
 911                                 goto error;
 912 
 913                         patt = mtdswap_test_patt(test + i);
 914                         for (j = 0; j < mtd->writesize/sizeof(int); j++)
 915                                 if (p1[j] != patt)
 916                                         goto error;
 917 
 918                         for (j = 0; j < mtd->oobavail; j++)
 919                                 if (p2[j] != (unsigned char)patt)
 920                                         goto error;
 921 
 922                         pos += mtd->writesize;
 923                 }
 924 
 925                 ret = mtdswap_erase_block(d, eb);
 926                 if (ret)
 927                         goto error;
 928         }
 929 
 930         eb->flags &= ~EBLOCK_READERR;
 931         return 1;
 932 
 933 error:
 934         mtdswap_handle_badblock(d, eb);
 935         return 0;
 936 }
 937 
 938 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
 939 {
 940         struct swap_eb *eb;
 941         int ret;
 942 
 943         if (d->spare_eblks < MIN_SPARE_EBLOCKS)
 944                 return 1;
 945 
 946         eb = mtdswap_pick_gc_eblk(d, background);
 947         if (!eb)
 948                 return 1;
 949 
 950         ret = mtdswap_gc_eblock(d, eb);
 951         if (ret == -ENOSPC)
 952                 return 1;
 953 
 954         if (eb->flags & EBLOCK_FAILED) {
 955                 mtdswap_handle_badblock(d, eb);
 956                 return 0;
 957         }
 958 
 959         eb->flags &= ~EBLOCK_BITFLIP;
 960         ret = mtdswap_erase_block(d, eb);
 961         if ((eb->flags & EBLOCK_READERR) &&
 962                 (ret || !mtdswap_eblk_passes(d, eb)))
 963                 return 0;
 964 
 965         if (ret == 0)
 966                 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
 967 
 968         if (ret == 0)
 969                 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
 970         else if (ret != -EIO && !mtd_is_eccerr(ret))
 971                 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
 972 
 973         return 0;
 974 }
 975 
 976 static void mtdswap_background(struct mtd_blktrans_dev *dev)
 977 {
 978         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
 979         int ret;
 980 
 981         while (1) {
 982                 ret = mtdswap_gc(d, 1);
 983                 if (ret || mtd_blktrans_cease_background(dev))
 984                         return;
 985         }
 986 }
 987 
 988 static void mtdswap_cleanup(struct mtdswap_dev *d)
 989 {
 990         vfree(d->eb_data);
 991         vfree(d->revmap);
 992         vfree(d->page_data);
 993         kfree(d->oob_buf);
 994         kfree(d->page_buf);
 995 }
 996 
 997 static int mtdswap_flush(struct mtd_blktrans_dev *dev)
 998 {
 999         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1000 
1001         mtd_sync(d->mtd);
1002         return 0;
1003 }
1004 
1005 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1006 {
1007         loff_t offset;
1008         unsigned int badcnt;
1009 
1010         badcnt = 0;
1011 
1012         if (mtd_can_have_bb(mtd))
1013                 for (offset = 0; offset < size; offset += mtd->erasesize)
1014                         if (mtd_block_isbad(mtd, offset))
1015                                 badcnt++;
1016 
1017         return badcnt;
1018 }
1019 
1020 static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1021                         unsigned long page, char *buf)
1022 {
1023         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1024         unsigned int newblock, mapped;
1025         struct swap_eb *eb;
1026         int ret;
1027 
1028         d->sect_write_count++;
1029 
1030         if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1031                 return -ENOSPC;
1032 
1033         if (header) {
1034                 /* Ignore writes to the header page */
1035                 if (unlikely(page == 0))
1036                         return 0;
1037 
1038                 page--;
1039         }
1040 
1041         mapped = d->page_data[page];
1042         if (mapped <= BLOCK_MAX) {
1043                 eb = d->eb_data + (mapped / d->pages_per_eblk);
1044                 eb->active_count--;
1045                 mtdswap_store_eb(d, eb);
1046                 d->page_data[page] = BLOCK_UNDEF;
1047                 d->revmap[mapped] = PAGE_UNDEF;
1048         }
1049 
1050         ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1051         d->mtd_write_count++;
1052 
1053         if (ret < 0)
1054                 return ret;
1055 
1056         eb = d->eb_data + (newblock / d->pages_per_eblk);
1057         d->page_data[page] = newblock;
1058 
1059         return 0;
1060 }
1061 
1062 /* Provide a dummy swap header for the kernel */
1063 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1064 {
1065         union swap_header *hd = (union swap_header *)(buf);
1066 
1067         memset(buf, 0, PAGE_SIZE - 10);
1068 
1069         hd->info.version = 1;
1070         hd->info.last_page = d->mbd_dev->size - 1;
1071         hd->info.nr_badpages = 0;
1072 
1073         memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1074 
1075         return 0;
1076 }
1077 
1078 static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1079                         unsigned long page, char *buf)
1080 {
1081         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1082         struct mtd_info *mtd = d->mtd;
1083         unsigned int realblock, retries;
1084         loff_t readpos;
1085         struct swap_eb *eb;
1086         size_t retlen;
1087         int ret;
1088 
1089         d->sect_read_count++;
1090 
1091         if (header) {
1092                 if (unlikely(page == 0))
1093                         return mtdswap_auto_header(d, buf);
1094 
1095                 page--;
1096         }
1097 
1098         realblock = d->page_data[page];
1099         if (realblock > BLOCK_MAX) {
1100                 memset(buf, 0x0, PAGE_SIZE);
1101                 if (realblock == BLOCK_UNDEF)
1102                         return 0;
1103                 else
1104                         return -EIO;
1105         }
1106 
1107         eb = d->eb_data + (realblock / d->pages_per_eblk);
1108         BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1109 
1110         readpos = (loff_t)realblock << PAGE_SHIFT;
1111         retries = 0;
1112 
1113 retry:
1114         ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1115 
1116         d->mtd_read_count++;
1117         if (mtd_is_bitflip(ret)) {
1118                 eb->flags |= EBLOCK_BITFLIP;
1119                 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1120                 ret = 0;
1121         }
1122 
1123         if (ret < 0) {
1124                 dev_err(d->dev, "Read error %d\n", ret);
1125                 eb->flags |= EBLOCK_READERR;
1126                 mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1127                 retries++;
1128                 if (retries < MTDSWAP_IO_RETRIES)
1129                         goto retry;
1130 
1131                 return ret;
1132         }
1133 
1134         if (retlen != PAGE_SIZE) {
1135                 dev_err(d->dev, "Short read %zd\n", retlen);
1136                 return -EIO;
1137         }
1138 
1139         return 0;
1140 }
1141 
1142 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1143                         unsigned nr_pages)
1144 {
1145         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1146         unsigned long page;
1147         struct swap_eb *eb;
1148         unsigned int mapped;
1149 
1150         d->discard_count++;
1151 
1152         for (page = first; page < first + nr_pages; page++) {
1153                 mapped = d->page_data[page];
1154                 if (mapped <= BLOCK_MAX) {
1155                         eb = d->eb_data + (mapped / d->pages_per_eblk);
1156                         eb->active_count--;
1157                         mtdswap_store_eb(d, eb);
1158                         d->page_data[page] = BLOCK_UNDEF;
1159                         d->revmap[mapped] = PAGE_UNDEF;
1160                         d->discard_page_count++;
1161                 } else if (mapped == BLOCK_ERROR) {
1162                         d->page_data[page] = BLOCK_UNDEF;
1163                         d->discard_page_count++;
1164                 }
1165         }
1166 
1167         return 0;
1168 }
1169 
1170 static int mtdswap_show(struct seq_file *s, void *data)
1171 {
1172         struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1173         unsigned long sum;
1174         unsigned int count[MTDSWAP_TREE_CNT];
1175         unsigned int min[MTDSWAP_TREE_CNT];
1176         unsigned int max[MTDSWAP_TREE_CNT];
1177         unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1178         uint64_t use_size;
1179         static const char * const name[] = {
1180                 "clean", "used", "low", "high", "dirty", "bitflip", "failing"
1181         };
1182 
1183         mutex_lock(&d->mbd_dev->lock);
1184 
1185         for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1186                 struct rb_root *root = &d->trees[i].root;
1187 
1188                 if (root->rb_node) {
1189                         count[i] = d->trees[i].count;
1190                         min[i] = MTDSWAP_ECNT_MIN(root);
1191                         max[i] = MTDSWAP_ECNT_MAX(root);
1192                 } else
1193                         count[i] = 0;
1194         }
1195 
1196         if (d->curr_write) {
1197                 cw = 1;
1198                 cwp = d->curr_write_pos;
1199                 cwecount = d->curr_write->erase_count;
1200         }
1201 
1202         sum = 0;
1203         for (i = 0; i < d->eblks; i++)
1204                 sum += d->eb_data[i].erase_count;
1205 
1206         use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1207         bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1208 
1209         mapped = 0;
1210         pages = d->mbd_dev->size;
1211         for (i = 0; i < pages; i++)
1212                 if (d->page_data[i] != BLOCK_UNDEF)
1213                         mapped++;
1214 
1215         mutex_unlock(&d->mbd_dev->lock);
1216 
1217         for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1218                 if (!count[i])
1219                         continue;
1220 
1221                 if (min[i] != max[i])
1222                         seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1223                                 "max %d times\n",
1224                                 name[i], count[i], min[i], max[i]);
1225                 else
1226                         seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1227                                 "times\n", name[i], count[i], min[i]);
1228         }
1229 
1230         if (bb_cnt)
1231                 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1232 
1233         if (cw)
1234                 seq_printf(s, "current erase block: %u pages used, %u free, "
1235                         "erased %u times\n",
1236                         cwp, d->pages_per_eblk - cwp, cwecount);
1237 
1238         seq_printf(s, "total erasures: %lu\n", sum);
1239 
1240         seq_puts(s, "\n");
1241 
1242         seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1243         seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1244         seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1245         seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1246         seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1247         seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1248 
1249         seq_puts(s, "\n");
1250         seq_printf(s, "total pages: %u\n", pages);
1251         seq_printf(s, "pages mapped: %u\n", mapped);
1252 
1253         return 0;
1254 }
1255 DEFINE_SHOW_ATTRIBUTE(mtdswap);
1256 
1257 static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1258 {
1259         struct dentry *root = d->mtd->dbg.dfs_dir;
1260         struct dentry *dent;
1261 
1262         if (!IS_ENABLED(CONFIG_DEBUG_FS))
1263                 return 0;
1264 
1265         if (IS_ERR_OR_NULL(root))
1266                 return -1;
1267 
1268         dent = debugfs_create_file("mtdswap_stats", S_IRUSR, root, d,
1269                                 &mtdswap_fops);
1270         if (!dent) {
1271                 dev_err(d->dev, "debugfs_create_file failed\n");
1272                 return -1;
1273         }
1274 
1275         return 0;
1276 }
1277 
1278 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1279                         unsigned int spare_cnt)
1280 {
1281         struct mtd_info *mtd = d->mbd_dev->mtd;
1282         unsigned int i, eblk_bytes, pages, blocks;
1283         int ret = -ENOMEM;
1284 
1285         d->mtd = mtd;
1286         d->eblks = eblocks;
1287         d->spare_eblks = spare_cnt;
1288         d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1289 
1290         pages = d->mbd_dev->size;
1291         blocks = eblocks * d->pages_per_eblk;
1292 
1293         for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1294                 d->trees[i].root = RB_ROOT;
1295 
1296         d->page_data = vmalloc(array_size(pages, sizeof(int)));
1297         if (!d->page_data)
1298                 goto page_data_fail;
1299 
1300         d->revmap = vmalloc(array_size(blocks, sizeof(int)));
1301         if (!d->revmap)
1302                 goto revmap_fail;
1303 
1304         eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1305         d->eb_data = vzalloc(eblk_bytes);
1306         if (!d->eb_data)
1307                 goto eb_data_fail;
1308 
1309         for (i = 0; i < pages; i++)
1310                 d->page_data[i] = BLOCK_UNDEF;
1311 
1312         for (i = 0; i < blocks; i++)
1313                 d->revmap[i] = PAGE_UNDEF;
1314 
1315         d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1316         if (!d->page_buf)
1317                 goto page_buf_fail;
1318 
1319         d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL);
1320         if (!d->oob_buf)
1321                 goto oob_buf_fail;
1322 
1323         mtdswap_scan_eblks(d);
1324 
1325         return 0;
1326 
1327 oob_buf_fail:
1328         kfree(d->page_buf);
1329 page_buf_fail:
1330         vfree(d->eb_data);
1331 eb_data_fail:
1332         vfree(d->revmap);
1333 revmap_fail:
1334         vfree(d->page_data);
1335 page_data_fail:
1336         printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1337         return ret;
1338 }
1339 
1340 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1341 {
1342         struct mtdswap_dev *d;
1343         struct mtd_blktrans_dev *mbd_dev;
1344         char *parts;
1345         char *this_opt;
1346         unsigned long part;
1347         unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1348         uint64_t swap_size, use_size, size_limit;
1349         int ret;
1350 
1351         parts = &partitions[0];
1352         if (!*parts)
1353                 return;
1354 
1355         while ((this_opt = strsep(&parts, ",")) != NULL) {
1356                 if (kstrtoul(this_opt, 0, &part) < 0)
1357                         return;
1358 
1359                 if (mtd->index == part)
1360                         break;
1361         }
1362 
1363         if (mtd->index != part)
1364                 return;
1365 
1366         if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1367                 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1368                         "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1369                 return;
1370         }
1371 
1372         if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1373                 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1374                         " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1375                 return;
1376         }
1377 
1378         if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
1379                 printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1380                         "%d available, %zu needed.\n",
1381                         MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
1382                 return;
1383         }
1384 
1385         if (spare_eblocks > 100)
1386                 spare_eblocks = 100;
1387 
1388         use_size = mtd->size;
1389         size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1390 
1391         if (mtd->size > size_limit) {
1392                 printk(KERN_WARNING "%s: Device too large. Limiting size to "
1393                         "%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1394                 use_size = size_limit;
1395         }
1396 
1397         eblocks = mtd_div_by_eb(use_size, mtd);
1398         use_size = (uint64_t)eblocks * mtd->erasesize;
1399         bad_blocks = mtdswap_badblocks(mtd, use_size);
1400         eavailable = eblocks - bad_blocks;
1401 
1402         if (eavailable < MIN_ERASE_BLOCKS) {
1403                 printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1404                         "%d needed\n", MTDSWAP_PREFIX, eavailable,
1405                         MIN_ERASE_BLOCKS);
1406                 return;
1407         }
1408 
1409         spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1410 
1411         if (spare_cnt < MIN_SPARE_EBLOCKS)
1412                 spare_cnt = MIN_SPARE_EBLOCKS;
1413 
1414         if (spare_cnt > eavailable - 1)
1415                 spare_cnt = eavailable - 1;
1416 
1417         swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1418                 (header ? PAGE_SIZE : 0);
1419 
1420         printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1421                 "%u spare, %u bad blocks\n",
1422                 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1423 
1424         d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1425         if (!d)
1426                 return;
1427 
1428         mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1429         if (!mbd_dev) {
1430                 kfree(d);
1431                 return;
1432         }
1433 
1434         d->mbd_dev = mbd_dev;
1435         mbd_dev->priv = d;
1436 
1437         mbd_dev->mtd = mtd;
1438         mbd_dev->devnum = mtd->index;
1439         mbd_dev->size = swap_size >> PAGE_SHIFT;
1440         mbd_dev->tr = tr;
1441 
1442         if (!(mtd->flags & MTD_WRITEABLE))
1443                 mbd_dev->readonly = 1;
1444 
1445         if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1446                 goto init_failed;
1447 
1448         if (add_mtd_blktrans_dev(mbd_dev) < 0)
1449                 goto cleanup;
1450 
1451         d->dev = disk_to_dev(mbd_dev->disk);
1452 
1453         ret = mtdswap_add_debugfs(d);
1454         if (ret < 0)
1455                 goto debugfs_failed;
1456 
1457         return;
1458 
1459 debugfs_failed:
1460         del_mtd_blktrans_dev(mbd_dev);
1461 
1462 cleanup:
1463         mtdswap_cleanup(d);
1464 
1465 init_failed:
1466         kfree(mbd_dev);
1467         kfree(d);
1468 }
1469 
1470 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1471 {
1472         struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1473 
1474         del_mtd_blktrans_dev(dev);
1475         mtdswap_cleanup(d);
1476         kfree(d);
1477 }
1478 
1479 static struct mtd_blktrans_ops mtdswap_ops = {
1480         .name           = "mtdswap",
1481         .major          = 0,
1482         .part_bits      = 0,
1483         .blksize        = PAGE_SIZE,
1484         .flush          = mtdswap_flush,
1485         .readsect       = mtdswap_readsect,
1486         .writesect      = mtdswap_writesect,
1487         .discard        = mtdswap_discard,
1488         .background     = mtdswap_background,
1489         .add_mtd        = mtdswap_add_mtd,
1490         .remove_dev     = mtdswap_remove_dev,
1491         .owner          = THIS_MODULE,
1492 };
1493 
1494 static int __init mtdswap_modinit(void)
1495 {
1496         return register_mtd_blktrans(&mtdswap_ops);
1497 }
1498 
1499 static void __exit mtdswap_modexit(void)
1500 {
1501         deregister_mtd_blktrans(&mtdswap_ops);
1502 }
1503 
1504 module_init(mtdswap_modinit);
1505 module_exit(mtdswap_modexit);
1506 
1507 
1508 MODULE_LICENSE("GPL");
1509 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1510 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1511                 "swap space");

/* [<][>][^][v][top][bottom][index][help] */