root/drivers/block/zram/zram_drv.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. zram_slot_trylock
  2. zram_slot_lock
  3. zram_slot_unlock
  4. init_done
  5. dev_to_zram
  6. zram_get_handle
  7. zram_set_handle
  8. zram_test_flag
  9. zram_set_flag
  10. zram_clear_flag
  11. zram_set_element
  12. zram_get_element
  13. zram_get_obj_size
  14. zram_set_obj_size
  15. zram_allocated
  16. is_partial_io
  17. is_partial_io
  18. valid_io_request
  19. update_position
  20. update_used_max
  21. zram_fill_page
  22. page_same_filled
  23. initstate_show
  24. disksize_show
  25. mem_limit_store
  26. mem_used_max_store
  27. idle_store
  28. writeback_limit_enable_store
  29. writeback_limit_enable_show
  30. writeback_limit_store
  31. writeback_limit_show
  32. reset_bdev
  33. backing_dev_show
  34. backing_dev_store
  35. alloc_block_bdev
  36. free_block_bdev
  37. zram_page_end_io
  38. read_from_bdev_async
  39. writeback_store
  40. zram_sync_read
  41. read_from_bdev_sync
  42. read_from_bdev_sync
  43. read_from_bdev
  44. reset_bdev
  45. read_from_bdev
  46. free_block_bdev
  47. zram_debugfs_create
  48. zram_debugfs_destroy
  49. zram_accessed
  50. read_block_state
  51. zram_debugfs_register
  52. zram_debugfs_unregister
  53. zram_debugfs_create
  54. zram_debugfs_destroy
  55. zram_accessed
  56. zram_debugfs_register
  57. zram_debugfs_unregister
  58. max_comp_streams_show
  59. max_comp_streams_store
  60. comp_algorithm_show
  61. comp_algorithm_store
  62. compact_store
  63. io_stat_show
  64. mm_stat_show
  65. bd_stat_show
  66. debug_stat_show
  67. zram_meta_free
  68. zram_meta_alloc
  69. zram_free_page
  70. __zram_bvec_read
  71. zram_bvec_read
  72. __zram_bvec_write
  73. zram_bvec_write
  74. zram_bio_discard
  75. zram_bvec_rw
  76. __zram_make_request
  77. zram_make_request
  78. zram_slot_free_notify
  79. zram_rw_page
  80. zram_reset_device
  81. disksize_store
  82. reset_store
  83. zram_open
  84. zram_add
  85. zram_remove
  86. hot_add_show
  87. hot_remove_store
  88. zram_remove_cb
  89. destroy_devices
  90. zram_init
  91. zram_exit

   1 /*
   2  * Compressed RAM block device
   3  *
   4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
   5  *               2012, 2013 Minchan Kim
   6  *
   7  * This code is released using a dual license strategy: BSD/GPL
   8  * You can choose the licence that better fits your requirements.
   9  *
  10  * Released under the terms of 3-clause BSD License
  11  * Released under the terms of GNU General Public License Version 2.0
  12  *
  13  */
  14 
  15 #define KMSG_COMPONENT "zram"
  16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  17 
  18 #include <linux/module.h>
  19 #include <linux/kernel.h>
  20 #include <linux/bio.h>
  21 #include <linux/bitops.h>
  22 #include <linux/blkdev.h>
  23 #include <linux/buffer_head.h>
  24 #include <linux/device.h>
  25 #include <linux/genhd.h>
  26 #include <linux/highmem.h>
  27 #include <linux/slab.h>
  28 #include <linux/backing-dev.h>
  29 #include <linux/string.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/err.h>
  32 #include <linux/idr.h>
  33 #include <linux/sysfs.h>
  34 #include <linux/debugfs.h>
  35 #include <linux/cpuhotplug.h>
  36 
  37 #include "zram_drv.h"
  38 
  39 static DEFINE_IDR(zram_index_idr);
  40 /* idr index must be protected */
  41 static DEFINE_MUTEX(zram_index_mutex);
  42 
  43 static int zram_major;
  44 static const char *default_compressor = "lzo-rle";
  45 
  46 /* Module params (documentation at end) */
  47 static unsigned int num_devices = 1;
  48 /*
  49  * Pages that compress to sizes equals or greater than this are stored
  50  * uncompressed in memory.
  51  */
  52 static size_t huge_class_size;
  53 
  54 static void zram_free_page(struct zram *zram, size_t index);
  55 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
  56                                 u32 index, int offset, struct bio *bio);
  57 
  58 
  59 static int zram_slot_trylock(struct zram *zram, u32 index)
  60 {
  61         return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
  62 }
  63 
  64 static void zram_slot_lock(struct zram *zram, u32 index)
  65 {
  66         bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
  67 }
  68 
  69 static void zram_slot_unlock(struct zram *zram, u32 index)
  70 {
  71         bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
  72 }
  73 
  74 static inline bool init_done(struct zram *zram)
  75 {
  76         return zram->disksize;
  77 }
  78 
  79 static inline struct zram *dev_to_zram(struct device *dev)
  80 {
  81         return (struct zram *)dev_to_disk(dev)->private_data;
  82 }
  83 
  84 static unsigned long zram_get_handle(struct zram *zram, u32 index)
  85 {
  86         return zram->table[index].handle;
  87 }
  88 
  89 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
  90 {
  91         zram->table[index].handle = handle;
  92 }
  93 
  94 /* flag operations require table entry bit_spin_lock() being held */
  95 static bool zram_test_flag(struct zram *zram, u32 index,
  96                         enum zram_pageflags flag)
  97 {
  98         return zram->table[index].flags & BIT(flag);
  99 }
 100 
 101 static void zram_set_flag(struct zram *zram, u32 index,
 102                         enum zram_pageflags flag)
 103 {
 104         zram->table[index].flags |= BIT(flag);
 105 }
 106 
 107 static void zram_clear_flag(struct zram *zram, u32 index,
 108                         enum zram_pageflags flag)
 109 {
 110         zram->table[index].flags &= ~BIT(flag);
 111 }
 112 
 113 static inline void zram_set_element(struct zram *zram, u32 index,
 114                         unsigned long element)
 115 {
 116         zram->table[index].element = element;
 117 }
 118 
 119 static unsigned long zram_get_element(struct zram *zram, u32 index)
 120 {
 121         return zram->table[index].element;
 122 }
 123 
 124 static size_t zram_get_obj_size(struct zram *zram, u32 index)
 125 {
 126         return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
 127 }
 128 
 129 static void zram_set_obj_size(struct zram *zram,
 130                                         u32 index, size_t size)
 131 {
 132         unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
 133 
 134         zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
 135 }
 136 
 137 static inline bool zram_allocated(struct zram *zram, u32 index)
 138 {
 139         return zram_get_obj_size(zram, index) ||
 140                         zram_test_flag(zram, index, ZRAM_SAME) ||
 141                         zram_test_flag(zram, index, ZRAM_WB);
 142 }
 143 
 144 #if PAGE_SIZE != 4096
 145 static inline bool is_partial_io(struct bio_vec *bvec)
 146 {
 147         return bvec->bv_len != PAGE_SIZE;
 148 }
 149 #else
 150 static inline bool is_partial_io(struct bio_vec *bvec)
 151 {
 152         return false;
 153 }
 154 #endif
 155 
 156 /*
 157  * Check if request is within bounds and aligned on zram logical blocks.
 158  */
 159 static inline bool valid_io_request(struct zram *zram,
 160                 sector_t start, unsigned int size)
 161 {
 162         u64 end, bound;
 163 
 164         /* unaligned request */
 165         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
 166                 return false;
 167         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
 168                 return false;
 169 
 170         end = start + (size >> SECTOR_SHIFT);
 171         bound = zram->disksize >> SECTOR_SHIFT;
 172         /* out of range range */
 173         if (unlikely(start >= bound || end > bound || start > end))
 174                 return false;
 175 
 176         /* I/O request is valid */
 177         return true;
 178 }
 179 
 180 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
 181 {
 182         *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
 183         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
 184 }
 185 
 186 static inline void update_used_max(struct zram *zram,
 187                                         const unsigned long pages)
 188 {
 189         unsigned long old_max, cur_max;
 190 
 191         old_max = atomic_long_read(&zram->stats.max_used_pages);
 192 
 193         do {
 194                 cur_max = old_max;
 195                 if (pages > cur_max)
 196                         old_max = atomic_long_cmpxchg(
 197                                 &zram->stats.max_used_pages, cur_max, pages);
 198         } while (old_max != cur_max);
 199 }
 200 
 201 static inline void zram_fill_page(void *ptr, unsigned long len,
 202                                         unsigned long value)
 203 {
 204         WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
 205         memset_l(ptr, value, len / sizeof(unsigned long));
 206 }
 207 
 208 static bool page_same_filled(void *ptr, unsigned long *element)
 209 {
 210         unsigned int pos;
 211         unsigned long *page;
 212         unsigned long val;
 213 
 214         page = (unsigned long *)ptr;
 215         val = page[0];
 216 
 217         for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
 218                 if (val != page[pos])
 219                         return false;
 220         }
 221 
 222         *element = val;
 223 
 224         return true;
 225 }
 226 
 227 static ssize_t initstate_show(struct device *dev,
 228                 struct device_attribute *attr, char *buf)
 229 {
 230         u32 val;
 231         struct zram *zram = dev_to_zram(dev);
 232 
 233         down_read(&zram->init_lock);
 234         val = init_done(zram);
 235         up_read(&zram->init_lock);
 236 
 237         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
 238 }
 239 
 240 static ssize_t disksize_show(struct device *dev,
 241                 struct device_attribute *attr, char *buf)
 242 {
 243         struct zram *zram = dev_to_zram(dev);
 244 
 245         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
 246 }
 247 
 248 static ssize_t mem_limit_store(struct device *dev,
 249                 struct device_attribute *attr, const char *buf, size_t len)
 250 {
 251         u64 limit;
 252         char *tmp;
 253         struct zram *zram = dev_to_zram(dev);
 254 
 255         limit = memparse(buf, &tmp);
 256         if (buf == tmp) /* no chars parsed, invalid input */
 257                 return -EINVAL;
 258 
 259         down_write(&zram->init_lock);
 260         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
 261         up_write(&zram->init_lock);
 262 
 263         return len;
 264 }
 265 
 266 static ssize_t mem_used_max_store(struct device *dev,
 267                 struct device_attribute *attr, const char *buf, size_t len)
 268 {
 269         int err;
 270         unsigned long val;
 271         struct zram *zram = dev_to_zram(dev);
 272 
 273         err = kstrtoul(buf, 10, &val);
 274         if (err || val != 0)
 275                 return -EINVAL;
 276 
 277         down_read(&zram->init_lock);
 278         if (init_done(zram)) {
 279                 atomic_long_set(&zram->stats.max_used_pages,
 280                                 zs_get_total_pages(zram->mem_pool));
 281         }
 282         up_read(&zram->init_lock);
 283 
 284         return len;
 285 }
 286 
 287 static ssize_t idle_store(struct device *dev,
 288                 struct device_attribute *attr, const char *buf, size_t len)
 289 {
 290         struct zram *zram = dev_to_zram(dev);
 291         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 292         int index;
 293 
 294         if (!sysfs_streq(buf, "all"))
 295                 return -EINVAL;
 296 
 297         down_read(&zram->init_lock);
 298         if (!init_done(zram)) {
 299                 up_read(&zram->init_lock);
 300                 return -EINVAL;
 301         }
 302 
 303         for (index = 0; index < nr_pages; index++) {
 304                 /*
 305                  * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
 306                  * See the comment in writeback_store.
 307                  */
 308                 zram_slot_lock(zram, index);
 309                 if (zram_allocated(zram, index) &&
 310                                 !zram_test_flag(zram, index, ZRAM_UNDER_WB))
 311                         zram_set_flag(zram, index, ZRAM_IDLE);
 312                 zram_slot_unlock(zram, index);
 313         }
 314 
 315         up_read(&zram->init_lock);
 316 
 317         return len;
 318 }
 319 
 320 #ifdef CONFIG_ZRAM_WRITEBACK
 321 static ssize_t writeback_limit_enable_store(struct device *dev,
 322                 struct device_attribute *attr, const char *buf, size_t len)
 323 {
 324         struct zram *zram = dev_to_zram(dev);
 325         u64 val;
 326         ssize_t ret = -EINVAL;
 327 
 328         if (kstrtoull(buf, 10, &val))
 329                 return ret;
 330 
 331         down_read(&zram->init_lock);
 332         spin_lock(&zram->wb_limit_lock);
 333         zram->wb_limit_enable = val;
 334         spin_unlock(&zram->wb_limit_lock);
 335         up_read(&zram->init_lock);
 336         ret = len;
 337 
 338         return ret;
 339 }
 340 
 341 static ssize_t writeback_limit_enable_show(struct device *dev,
 342                 struct device_attribute *attr, char *buf)
 343 {
 344         bool val;
 345         struct zram *zram = dev_to_zram(dev);
 346 
 347         down_read(&zram->init_lock);
 348         spin_lock(&zram->wb_limit_lock);
 349         val = zram->wb_limit_enable;
 350         spin_unlock(&zram->wb_limit_lock);
 351         up_read(&zram->init_lock);
 352 
 353         return scnprintf(buf, PAGE_SIZE, "%d\n", val);
 354 }
 355 
 356 static ssize_t writeback_limit_store(struct device *dev,
 357                 struct device_attribute *attr, const char *buf, size_t len)
 358 {
 359         struct zram *zram = dev_to_zram(dev);
 360         u64 val;
 361         ssize_t ret = -EINVAL;
 362 
 363         if (kstrtoull(buf, 10, &val))
 364                 return ret;
 365 
 366         down_read(&zram->init_lock);
 367         spin_lock(&zram->wb_limit_lock);
 368         zram->bd_wb_limit = val;
 369         spin_unlock(&zram->wb_limit_lock);
 370         up_read(&zram->init_lock);
 371         ret = len;
 372 
 373         return ret;
 374 }
 375 
 376 static ssize_t writeback_limit_show(struct device *dev,
 377                 struct device_attribute *attr, char *buf)
 378 {
 379         u64 val;
 380         struct zram *zram = dev_to_zram(dev);
 381 
 382         down_read(&zram->init_lock);
 383         spin_lock(&zram->wb_limit_lock);
 384         val = zram->bd_wb_limit;
 385         spin_unlock(&zram->wb_limit_lock);
 386         up_read(&zram->init_lock);
 387 
 388         return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
 389 }
 390 
 391 static void reset_bdev(struct zram *zram)
 392 {
 393         struct block_device *bdev;
 394 
 395         if (!zram->backing_dev)
 396                 return;
 397 
 398         bdev = zram->bdev;
 399         if (zram->old_block_size)
 400                 set_blocksize(bdev, zram->old_block_size);
 401         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 402         /* hope filp_close flush all of IO */
 403         filp_close(zram->backing_dev, NULL);
 404         zram->backing_dev = NULL;
 405         zram->old_block_size = 0;
 406         zram->bdev = NULL;
 407         zram->disk->queue->backing_dev_info->capabilities |=
 408                                 BDI_CAP_SYNCHRONOUS_IO;
 409         kvfree(zram->bitmap);
 410         zram->bitmap = NULL;
 411 }
 412 
 413 static ssize_t backing_dev_show(struct device *dev,
 414                 struct device_attribute *attr, char *buf)
 415 {
 416         struct file *file;
 417         struct zram *zram = dev_to_zram(dev);
 418         char *p;
 419         ssize_t ret;
 420 
 421         down_read(&zram->init_lock);
 422         file = zram->backing_dev;
 423         if (!file) {
 424                 memcpy(buf, "none\n", 5);
 425                 up_read(&zram->init_lock);
 426                 return 5;
 427         }
 428 
 429         p = file_path(file, buf, PAGE_SIZE - 1);
 430         if (IS_ERR(p)) {
 431                 ret = PTR_ERR(p);
 432                 goto out;
 433         }
 434 
 435         ret = strlen(p);
 436         memmove(buf, p, ret);
 437         buf[ret++] = '\n';
 438 out:
 439         up_read(&zram->init_lock);
 440         return ret;
 441 }
 442 
 443 static ssize_t backing_dev_store(struct device *dev,
 444                 struct device_attribute *attr, const char *buf, size_t len)
 445 {
 446         char *file_name;
 447         size_t sz;
 448         struct file *backing_dev = NULL;
 449         struct inode *inode;
 450         struct address_space *mapping;
 451         unsigned int bitmap_sz, old_block_size = 0;
 452         unsigned long nr_pages, *bitmap = NULL;
 453         struct block_device *bdev = NULL;
 454         int err;
 455         struct zram *zram = dev_to_zram(dev);
 456 
 457         file_name = kmalloc(PATH_MAX, GFP_KERNEL);
 458         if (!file_name)
 459                 return -ENOMEM;
 460 
 461         down_write(&zram->init_lock);
 462         if (init_done(zram)) {
 463                 pr_info("Can't setup backing device for initialized device\n");
 464                 err = -EBUSY;
 465                 goto out;
 466         }
 467 
 468         strlcpy(file_name, buf, PATH_MAX);
 469         /* ignore trailing newline */
 470         sz = strlen(file_name);
 471         if (sz > 0 && file_name[sz - 1] == '\n')
 472                 file_name[sz - 1] = 0x00;
 473 
 474         backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
 475         if (IS_ERR(backing_dev)) {
 476                 err = PTR_ERR(backing_dev);
 477                 backing_dev = NULL;
 478                 goto out;
 479         }
 480 
 481         mapping = backing_dev->f_mapping;
 482         inode = mapping->host;
 483 
 484         /* Support only block device in this moment */
 485         if (!S_ISBLK(inode->i_mode)) {
 486                 err = -ENOTBLK;
 487                 goto out;
 488         }
 489 
 490         bdev = bdgrab(I_BDEV(inode));
 491         err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
 492         if (err < 0) {
 493                 bdev = NULL;
 494                 goto out;
 495         }
 496 
 497         nr_pages = i_size_read(inode) >> PAGE_SHIFT;
 498         bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
 499         bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
 500         if (!bitmap) {
 501                 err = -ENOMEM;
 502                 goto out;
 503         }
 504 
 505         old_block_size = block_size(bdev);
 506         err = set_blocksize(bdev, PAGE_SIZE);
 507         if (err)
 508                 goto out;
 509 
 510         reset_bdev(zram);
 511 
 512         zram->old_block_size = old_block_size;
 513         zram->bdev = bdev;
 514         zram->backing_dev = backing_dev;
 515         zram->bitmap = bitmap;
 516         zram->nr_pages = nr_pages;
 517         /*
 518          * With writeback feature, zram does asynchronous IO so it's no longer
 519          * synchronous device so let's remove synchronous io flag. Othewise,
 520          * upper layer(e.g., swap) could wait IO completion rather than
 521          * (submit and return), which will cause system sluggish.
 522          * Furthermore, when the IO function returns(e.g., swap_readpage),
 523          * upper layer expects IO was done so it could deallocate the page
 524          * freely but in fact, IO is going on so finally could cause
 525          * use-after-free when the IO is really done.
 526          */
 527         zram->disk->queue->backing_dev_info->capabilities &=
 528                         ~BDI_CAP_SYNCHRONOUS_IO;
 529         up_write(&zram->init_lock);
 530 
 531         pr_info("setup backing device %s\n", file_name);
 532         kfree(file_name);
 533 
 534         return len;
 535 out:
 536         if (bitmap)
 537                 kvfree(bitmap);
 538 
 539         if (bdev)
 540                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 541 
 542         if (backing_dev)
 543                 filp_close(backing_dev, NULL);
 544 
 545         up_write(&zram->init_lock);
 546 
 547         kfree(file_name);
 548 
 549         return err;
 550 }
 551 
 552 static unsigned long alloc_block_bdev(struct zram *zram)
 553 {
 554         unsigned long blk_idx = 1;
 555 retry:
 556         /* skip 0 bit to confuse zram.handle = 0 */
 557         blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
 558         if (blk_idx == zram->nr_pages)
 559                 return 0;
 560 
 561         if (test_and_set_bit(blk_idx, zram->bitmap))
 562                 goto retry;
 563 
 564         atomic64_inc(&zram->stats.bd_count);
 565         return blk_idx;
 566 }
 567 
 568 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
 569 {
 570         int was_set;
 571 
 572         was_set = test_and_clear_bit(blk_idx, zram->bitmap);
 573         WARN_ON_ONCE(!was_set);
 574         atomic64_dec(&zram->stats.bd_count);
 575 }
 576 
 577 static void zram_page_end_io(struct bio *bio)
 578 {
 579         struct page *page = bio_first_page_all(bio);
 580 
 581         page_endio(page, op_is_write(bio_op(bio)),
 582                         blk_status_to_errno(bio->bi_status));
 583         bio_put(bio);
 584 }
 585 
 586 /*
 587  * Returns 1 if the submission is successful.
 588  */
 589 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 590                         unsigned long entry, struct bio *parent)
 591 {
 592         struct bio *bio;
 593 
 594         bio = bio_alloc(GFP_ATOMIC, 1);
 595         if (!bio)
 596                 return -ENOMEM;
 597 
 598         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
 599         bio_set_dev(bio, zram->bdev);
 600         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
 601                 bio_put(bio);
 602                 return -EIO;
 603         }
 604 
 605         if (!parent) {
 606                 bio->bi_opf = REQ_OP_READ;
 607                 bio->bi_end_io = zram_page_end_io;
 608         } else {
 609                 bio->bi_opf = parent->bi_opf;
 610                 bio_chain(bio, parent);
 611         }
 612 
 613         submit_bio(bio);
 614         return 1;
 615 }
 616 
 617 #define HUGE_WRITEBACK 1
 618 #define IDLE_WRITEBACK 2
 619 
 620 static ssize_t writeback_store(struct device *dev,
 621                 struct device_attribute *attr, const char *buf, size_t len)
 622 {
 623         struct zram *zram = dev_to_zram(dev);
 624         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 625         unsigned long index;
 626         struct bio bio;
 627         struct bio_vec bio_vec;
 628         struct page *page;
 629         ssize_t ret = len;
 630         int mode;
 631         unsigned long blk_idx = 0;
 632 
 633         if (sysfs_streq(buf, "idle"))
 634                 mode = IDLE_WRITEBACK;
 635         else if (sysfs_streq(buf, "huge"))
 636                 mode = HUGE_WRITEBACK;
 637         else
 638                 return -EINVAL;
 639 
 640         down_read(&zram->init_lock);
 641         if (!init_done(zram)) {
 642                 ret = -EINVAL;
 643                 goto release_init_lock;
 644         }
 645 
 646         if (!zram->backing_dev) {
 647                 ret = -ENODEV;
 648                 goto release_init_lock;
 649         }
 650 
 651         page = alloc_page(GFP_KERNEL);
 652         if (!page) {
 653                 ret = -ENOMEM;
 654                 goto release_init_lock;
 655         }
 656 
 657         for (index = 0; index < nr_pages; index++) {
 658                 struct bio_vec bvec;
 659 
 660                 bvec.bv_page = page;
 661                 bvec.bv_len = PAGE_SIZE;
 662                 bvec.bv_offset = 0;
 663 
 664                 spin_lock(&zram->wb_limit_lock);
 665                 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
 666                         spin_unlock(&zram->wb_limit_lock);
 667                         ret = -EIO;
 668                         break;
 669                 }
 670                 spin_unlock(&zram->wb_limit_lock);
 671 
 672                 if (!blk_idx) {
 673                         blk_idx = alloc_block_bdev(zram);
 674                         if (!blk_idx) {
 675                                 ret = -ENOSPC;
 676                                 break;
 677                         }
 678                 }
 679 
 680                 zram_slot_lock(zram, index);
 681                 if (!zram_allocated(zram, index))
 682                         goto next;
 683 
 684                 if (zram_test_flag(zram, index, ZRAM_WB) ||
 685                                 zram_test_flag(zram, index, ZRAM_SAME) ||
 686                                 zram_test_flag(zram, index, ZRAM_UNDER_WB))
 687                         goto next;
 688 
 689                 if (mode == IDLE_WRITEBACK &&
 690                           !zram_test_flag(zram, index, ZRAM_IDLE))
 691                         goto next;
 692                 if (mode == HUGE_WRITEBACK &&
 693                           !zram_test_flag(zram, index, ZRAM_HUGE))
 694                         goto next;
 695                 /*
 696                  * Clearing ZRAM_UNDER_WB is duty of caller.
 697                  * IOW, zram_free_page never clear it.
 698                  */
 699                 zram_set_flag(zram, index, ZRAM_UNDER_WB);
 700                 /* Need for hugepage writeback racing */
 701                 zram_set_flag(zram, index, ZRAM_IDLE);
 702                 zram_slot_unlock(zram, index);
 703                 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
 704                         zram_slot_lock(zram, index);
 705                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 706                         zram_clear_flag(zram, index, ZRAM_IDLE);
 707                         zram_slot_unlock(zram, index);
 708                         continue;
 709                 }
 710 
 711                 bio_init(&bio, &bio_vec, 1);
 712                 bio_set_dev(&bio, zram->bdev);
 713                 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
 714                 bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
 715 
 716                 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
 717                                 bvec.bv_offset);
 718                 /*
 719                  * XXX: A single page IO would be inefficient for write
 720                  * but it would be not bad as starter.
 721                  */
 722                 ret = submit_bio_wait(&bio);
 723                 if (ret) {
 724                         zram_slot_lock(zram, index);
 725                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 726                         zram_clear_flag(zram, index, ZRAM_IDLE);
 727                         zram_slot_unlock(zram, index);
 728                         continue;
 729                 }
 730 
 731                 atomic64_inc(&zram->stats.bd_writes);
 732                 /*
 733                  * We released zram_slot_lock so need to check if the slot was
 734                  * changed. If there is freeing for the slot, we can catch it
 735                  * easily by zram_allocated.
 736                  * A subtle case is the slot is freed/reallocated/marked as
 737                  * ZRAM_IDLE again. To close the race, idle_store doesn't
 738                  * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
 739                  * Thus, we could close the race by checking ZRAM_IDLE bit.
 740                  */
 741                 zram_slot_lock(zram, index);
 742                 if (!zram_allocated(zram, index) ||
 743                           !zram_test_flag(zram, index, ZRAM_IDLE)) {
 744                         zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 745                         zram_clear_flag(zram, index, ZRAM_IDLE);
 746                         goto next;
 747                 }
 748 
 749                 zram_free_page(zram, index);
 750                 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
 751                 zram_set_flag(zram, index, ZRAM_WB);
 752                 zram_set_element(zram, index, blk_idx);
 753                 blk_idx = 0;
 754                 atomic64_inc(&zram->stats.pages_stored);
 755                 spin_lock(&zram->wb_limit_lock);
 756                 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
 757                         zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
 758                 spin_unlock(&zram->wb_limit_lock);
 759 next:
 760                 zram_slot_unlock(zram, index);
 761         }
 762 
 763         if (blk_idx)
 764                 free_block_bdev(zram, blk_idx);
 765         __free_page(page);
 766 release_init_lock:
 767         up_read(&zram->init_lock);
 768 
 769         return ret;
 770 }
 771 
 772 struct zram_work {
 773         struct work_struct work;
 774         struct zram *zram;
 775         unsigned long entry;
 776         struct bio *bio;
 777         struct bio_vec bvec;
 778 };
 779 
 780 #if PAGE_SIZE != 4096
 781 static void zram_sync_read(struct work_struct *work)
 782 {
 783         struct zram_work *zw = container_of(work, struct zram_work, work);
 784         struct zram *zram = zw->zram;
 785         unsigned long entry = zw->entry;
 786         struct bio *bio = zw->bio;
 787 
 788         read_from_bdev_async(zram, &zw->bvec, entry, bio);
 789 }
 790 
 791 /*
 792  * Block layer want one ->make_request_fn to be active at a time
 793  * so if we use chained IO with parent IO in same context,
 794  * it's a deadlock. To avoid, it, it uses worker thread context.
 795  */
 796 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 797                                 unsigned long entry, struct bio *bio)
 798 {
 799         struct zram_work work;
 800 
 801         work.bvec = *bvec;
 802         work.zram = zram;
 803         work.entry = entry;
 804         work.bio = bio;
 805 
 806         INIT_WORK_ONSTACK(&work.work, zram_sync_read);
 807         queue_work(system_unbound_wq, &work.work);
 808         flush_work(&work.work);
 809         destroy_work_on_stack(&work.work);
 810 
 811         return 1;
 812 }
 813 #else
 814 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 815                                 unsigned long entry, struct bio *bio)
 816 {
 817         WARN_ON(1);
 818         return -EIO;
 819 }
 820 #endif
 821 
 822 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 823                         unsigned long entry, struct bio *parent, bool sync)
 824 {
 825         atomic64_inc(&zram->stats.bd_reads);
 826         if (sync)
 827                 return read_from_bdev_sync(zram, bvec, entry, parent);
 828         else
 829                 return read_from_bdev_async(zram, bvec, entry, parent);
 830 }
 831 #else
 832 static inline void reset_bdev(struct zram *zram) {};
 833 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 834                         unsigned long entry, struct bio *parent, bool sync)
 835 {
 836         return -EIO;
 837 }
 838 
 839 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
 840 #endif
 841 
 842 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
 843 
 844 static struct dentry *zram_debugfs_root;
 845 
 846 static void zram_debugfs_create(void)
 847 {
 848         zram_debugfs_root = debugfs_create_dir("zram", NULL);
 849 }
 850 
 851 static void zram_debugfs_destroy(void)
 852 {
 853         debugfs_remove_recursive(zram_debugfs_root);
 854 }
 855 
 856 static void zram_accessed(struct zram *zram, u32 index)
 857 {
 858         zram_clear_flag(zram, index, ZRAM_IDLE);
 859         zram->table[index].ac_time = ktime_get_boottime();
 860 }
 861 
 862 static ssize_t read_block_state(struct file *file, char __user *buf,
 863                                 size_t count, loff_t *ppos)
 864 {
 865         char *kbuf;
 866         ssize_t index, written = 0;
 867         struct zram *zram = file->private_data;
 868         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
 869         struct timespec64 ts;
 870 
 871         kbuf = kvmalloc(count, GFP_KERNEL);
 872         if (!kbuf)
 873                 return -ENOMEM;
 874 
 875         down_read(&zram->init_lock);
 876         if (!init_done(zram)) {
 877                 up_read(&zram->init_lock);
 878                 kvfree(kbuf);
 879                 return -EINVAL;
 880         }
 881 
 882         for (index = *ppos; index < nr_pages; index++) {
 883                 int copied;
 884 
 885                 zram_slot_lock(zram, index);
 886                 if (!zram_allocated(zram, index))
 887                         goto next;
 888 
 889                 ts = ktime_to_timespec64(zram->table[index].ac_time);
 890                 copied = snprintf(kbuf + written, count,
 891                         "%12zd %12lld.%06lu %c%c%c%c\n",
 892                         index, (s64)ts.tv_sec,
 893                         ts.tv_nsec / NSEC_PER_USEC,
 894                         zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
 895                         zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
 896                         zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
 897                         zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
 898 
 899                 if (count < copied) {
 900                         zram_slot_unlock(zram, index);
 901                         break;
 902                 }
 903                 written += copied;
 904                 count -= copied;
 905 next:
 906                 zram_slot_unlock(zram, index);
 907                 *ppos += 1;
 908         }
 909 
 910         up_read(&zram->init_lock);
 911         if (copy_to_user(buf, kbuf, written))
 912                 written = -EFAULT;
 913         kvfree(kbuf);
 914 
 915         return written;
 916 }
 917 
 918 static const struct file_operations proc_zram_block_state_op = {
 919         .open = simple_open,
 920         .read = read_block_state,
 921         .llseek = default_llseek,
 922 };
 923 
 924 static void zram_debugfs_register(struct zram *zram)
 925 {
 926         if (!zram_debugfs_root)
 927                 return;
 928 
 929         zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
 930                                                 zram_debugfs_root);
 931         debugfs_create_file("block_state", 0400, zram->debugfs_dir,
 932                                 zram, &proc_zram_block_state_op);
 933 }
 934 
 935 static void zram_debugfs_unregister(struct zram *zram)
 936 {
 937         debugfs_remove_recursive(zram->debugfs_dir);
 938 }
 939 #else
 940 static void zram_debugfs_create(void) {};
 941 static void zram_debugfs_destroy(void) {};
 942 static void zram_accessed(struct zram *zram, u32 index)
 943 {
 944         zram_clear_flag(zram, index, ZRAM_IDLE);
 945 };
 946 static void zram_debugfs_register(struct zram *zram) {};
 947 static void zram_debugfs_unregister(struct zram *zram) {};
 948 #endif
 949 
 950 /*
 951  * We switched to per-cpu streams and this attr is not needed anymore.
 952  * However, we will keep it around for some time, because:
 953  * a) we may revert per-cpu streams in the future
 954  * b) it's visible to user space and we need to follow our 2 years
 955  *    retirement rule; but we already have a number of 'soon to be
 956  *    altered' attrs, so max_comp_streams need to wait for the next
 957  *    layoff cycle.
 958  */
 959 static ssize_t max_comp_streams_show(struct device *dev,
 960                 struct device_attribute *attr, char *buf)
 961 {
 962         return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
 963 }
 964 
 965 static ssize_t max_comp_streams_store(struct device *dev,
 966                 struct device_attribute *attr, const char *buf, size_t len)
 967 {
 968         return len;
 969 }
 970 
 971 static ssize_t comp_algorithm_show(struct device *dev,
 972                 struct device_attribute *attr, char *buf)
 973 {
 974         size_t sz;
 975         struct zram *zram = dev_to_zram(dev);
 976 
 977         down_read(&zram->init_lock);
 978         sz = zcomp_available_show(zram->compressor, buf);
 979         up_read(&zram->init_lock);
 980 
 981         return sz;
 982 }
 983 
 984 static ssize_t comp_algorithm_store(struct device *dev,
 985                 struct device_attribute *attr, const char *buf, size_t len)
 986 {
 987         struct zram *zram = dev_to_zram(dev);
 988         char compressor[ARRAY_SIZE(zram->compressor)];
 989         size_t sz;
 990 
 991         strlcpy(compressor, buf, sizeof(compressor));
 992         /* ignore trailing newline */
 993         sz = strlen(compressor);
 994         if (sz > 0 && compressor[sz - 1] == '\n')
 995                 compressor[sz - 1] = 0x00;
 996 
 997         if (!zcomp_available_algorithm(compressor))
 998                 return -EINVAL;
 999 
1000         down_write(&zram->init_lock);
1001         if (init_done(zram)) {
1002                 up_write(&zram->init_lock);
1003                 pr_info("Can't change algorithm for initialized device\n");
1004                 return -EBUSY;
1005         }
1006 
1007         strcpy(zram->compressor, compressor);
1008         up_write(&zram->init_lock);
1009         return len;
1010 }
1011 
1012 static ssize_t compact_store(struct device *dev,
1013                 struct device_attribute *attr, const char *buf, size_t len)
1014 {
1015         struct zram *zram = dev_to_zram(dev);
1016 
1017         down_read(&zram->init_lock);
1018         if (!init_done(zram)) {
1019                 up_read(&zram->init_lock);
1020                 return -EINVAL;
1021         }
1022 
1023         zs_compact(zram->mem_pool);
1024         up_read(&zram->init_lock);
1025 
1026         return len;
1027 }
1028 
1029 static ssize_t io_stat_show(struct device *dev,
1030                 struct device_attribute *attr, char *buf)
1031 {
1032         struct zram *zram = dev_to_zram(dev);
1033         ssize_t ret;
1034 
1035         down_read(&zram->init_lock);
1036         ret = scnprintf(buf, PAGE_SIZE,
1037                         "%8llu %8llu %8llu %8llu\n",
1038                         (u64)atomic64_read(&zram->stats.failed_reads),
1039                         (u64)atomic64_read(&zram->stats.failed_writes),
1040                         (u64)atomic64_read(&zram->stats.invalid_io),
1041                         (u64)atomic64_read(&zram->stats.notify_free));
1042         up_read(&zram->init_lock);
1043 
1044         return ret;
1045 }
1046 
1047 static ssize_t mm_stat_show(struct device *dev,
1048                 struct device_attribute *attr, char *buf)
1049 {
1050         struct zram *zram = dev_to_zram(dev);
1051         struct zs_pool_stats pool_stats;
1052         u64 orig_size, mem_used = 0;
1053         long max_used;
1054         ssize_t ret;
1055 
1056         memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1057 
1058         down_read(&zram->init_lock);
1059         if (init_done(zram)) {
1060                 mem_used = zs_get_total_pages(zram->mem_pool);
1061                 zs_pool_stats(zram->mem_pool, &pool_stats);
1062         }
1063 
1064         orig_size = atomic64_read(&zram->stats.pages_stored);
1065         max_used = atomic_long_read(&zram->stats.max_used_pages);
1066 
1067         ret = scnprintf(buf, PAGE_SIZE,
1068                         "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
1069                         orig_size << PAGE_SHIFT,
1070                         (u64)atomic64_read(&zram->stats.compr_data_size),
1071                         mem_used << PAGE_SHIFT,
1072                         zram->limit_pages << PAGE_SHIFT,
1073                         max_used << PAGE_SHIFT,
1074                         (u64)atomic64_read(&zram->stats.same_pages),
1075                         pool_stats.pages_compacted,
1076                         (u64)atomic64_read(&zram->stats.huge_pages));
1077         up_read(&zram->init_lock);
1078 
1079         return ret;
1080 }
1081 
1082 #ifdef CONFIG_ZRAM_WRITEBACK
1083 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1084 static ssize_t bd_stat_show(struct device *dev,
1085                 struct device_attribute *attr, char *buf)
1086 {
1087         struct zram *zram = dev_to_zram(dev);
1088         ssize_t ret;
1089 
1090         down_read(&zram->init_lock);
1091         ret = scnprintf(buf, PAGE_SIZE,
1092                 "%8llu %8llu %8llu\n",
1093                         FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1094                         FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1095                         FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1096         up_read(&zram->init_lock);
1097 
1098         return ret;
1099 }
1100 #endif
1101 
1102 static ssize_t debug_stat_show(struct device *dev,
1103                 struct device_attribute *attr, char *buf)
1104 {
1105         int version = 1;
1106         struct zram *zram = dev_to_zram(dev);
1107         ssize_t ret;
1108 
1109         down_read(&zram->init_lock);
1110         ret = scnprintf(buf, PAGE_SIZE,
1111                         "version: %d\n%8llu %8llu\n",
1112                         version,
1113                         (u64)atomic64_read(&zram->stats.writestall),
1114                         (u64)atomic64_read(&zram->stats.miss_free));
1115         up_read(&zram->init_lock);
1116 
1117         return ret;
1118 }
1119 
1120 static DEVICE_ATTR_RO(io_stat);
1121 static DEVICE_ATTR_RO(mm_stat);
1122 #ifdef CONFIG_ZRAM_WRITEBACK
1123 static DEVICE_ATTR_RO(bd_stat);
1124 #endif
1125 static DEVICE_ATTR_RO(debug_stat);
1126 
1127 static void zram_meta_free(struct zram *zram, u64 disksize)
1128 {
1129         size_t num_pages = disksize >> PAGE_SHIFT;
1130         size_t index;
1131 
1132         /* Free all pages that are still in this zram device */
1133         for (index = 0; index < num_pages; index++)
1134                 zram_free_page(zram, index);
1135 
1136         zs_destroy_pool(zram->mem_pool);
1137         vfree(zram->table);
1138 }
1139 
1140 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1141 {
1142         size_t num_pages;
1143 
1144         num_pages = disksize >> PAGE_SHIFT;
1145         zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1146         if (!zram->table)
1147                 return false;
1148 
1149         zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1150         if (!zram->mem_pool) {
1151                 vfree(zram->table);
1152                 return false;
1153         }
1154 
1155         if (!huge_class_size)
1156                 huge_class_size = zs_huge_class_size(zram->mem_pool);
1157         return true;
1158 }
1159 
1160 /*
1161  * To protect concurrent access to the same index entry,
1162  * caller should hold this table index entry's bit_spinlock to
1163  * indicate this index entry is accessing.
1164  */
1165 static void zram_free_page(struct zram *zram, size_t index)
1166 {
1167         unsigned long handle;
1168 
1169 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1170         zram->table[index].ac_time = 0;
1171 #endif
1172         if (zram_test_flag(zram, index, ZRAM_IDLE))
1173                 zram_clear_flag(zram, index, ZRAM_IDLE);
1174 
1175         if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1176                 zram_clear_flag(zram, index, ZRAM_HUGE);
1177                 atomic64_dec(&zram->stats.huge_pages);
1178         }
1179 
1180         if (zram_test_flag(zram, index, ZRAM_WB)) {
1181                 zram_clear_flag(zram, index, ZRAM_WB);
1182                 free_block_bdev(zram, zram_get_element(zram, index));
1183                 goto out;
1184         }
1185 
1186         /*
1187          * No memory is allocated for same element filled pages.
1188          * Simply clear same page flag.
1189          */
1190         if (zram_test_flag(zram, index, ZRAM_SAME)) {
1191                 zram_clear_flag(zram, index, ZRAM_SAME);
1192                 atomic64_dec(&zram->stats.same_pages);
1193                 goto out;
1194         }
1195 
1196         handle = zram_get_handle(zram, index);
1197         if (!handle)
1198                 return;
1199 
1200         zs_free(zram->mem_pool, handle);
1201 
1202         atomic64_sub(zram_get_obj_size(zram, index),
1203                         &zram->stats.compr_data_size);
1204 out:
1205         atomic64_dec(&zram->stats.pages_stored);
1206         zram_set_handle(zram, index, 0);
1207         zram_set_obj_size(zram, index, 0);
1208         WARN_ON_ONCE(zram->table[index].flags &
1209                 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1210 }
1211 
1212 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1213                                 struct bio *bio, bool partial_io)
1214 {
1215         int ret;
1216         unsigned long handle;
1217         unsigned int size;
1218         void *src, *dst;
1219 
1220         zram_slot_lock(zram, index);
1221         if (zram_test_flag(zram, index, ZRAM_WB)) {
1222                 struct bio_vec bvec;
1223 
1224                 zram_slot_unlock(zram, index);
1225 
1226                 bvec.bv_page = page;
1227                 bvec.bv_len = PAGE_SIZE;
1228                 bvec.bv_offset = 0;
1229                 return read_from_bdev(zram, &bvec,
1230                                 zram_get_element(zram, index),
1231                                 bio, partial_io);
1232         }
1233 
1234         handle = zram_get_handle(zram, index);
1235         if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1236                 unsigned long value;
1237                 void *mem;
1238 
1239                 value = handle ? zram_get_element(zram, index) : 0;
1240                 mem = kmap_atomic(page);
1241                 zram_fill_page(mem, PAGE_SIZE, value);
1242                 kunmap_atomic(mem);
1243                 zram_slot_unlock(zram, index);
1244                 return 0;
1245         }
1246 
1247         size = zram_get_obj_size(zram, index);
1248 
1249         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1250         if (size == PAGE_SIZE) {
1251                 dst = kmap_atomic(page);
1252                 memcpy(dst, src, PAGE_SIZE);
1253                 kunmap_atomic(dst);
1254                 ret = 0;
1255         } else {
1256                 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
1257 
1258                 dst = kmap_atomic(page);
1259                 ret = zcomp_decompress(zstrm, src, size, dst);
1260                 kunmap_atomic(dst);
1261                 zcomp_stream_put(zram->comp);
1262         }
1263         zs_unmap_object(zram->mem_pool, handle);
1264         zram_slot_unlock(zram, index);
1265 
1266         /* Should NEVER happen. Return bio error if it does. */
1267         if (unlikely(ret))
1268                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1269 
1270         return ret;
1271 }
1272 
1273 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1274                                 u32 index, int offset, struct bio *bio)
1275 {
1276         int ret;
1277         struct page *page;
1278 
1279         page = bvec->bv_page;
1280         if (is_partial_io(bvec)) {
1281                 /* Use a temporary buffer to decompress the page */
1282                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1283                 if (!page)
1284                         return -ENOMEM;
1285         }
1286 
1287         ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1288         if (unlikely(ret))
1289                 goto out;
1290 
1291         if (is_partial_io(bvec)) {
1292                 void *dst = kmap_atomic(bvec->bv_page);
1293                 void *src = kmap_atomic(page);
1294 
1295                 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1296                 kunmap_atomic(src);
1297                 kunmap_atomic(dst);
1298         }
1299 out:
1300         if (is_partial_io(bvec))
1301                 __free_page(page);
1302 
1303         return ret;
1304 }
1305 
1306 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1307                                 u32 index, struct bio *bio)
1308 {
1309         int ret = 0;
1310         unsigned long alloced_pages;
1311         unsigned long handle = 0;
1312         unsigned int comp_len = 0;
1313         void *src, *dst, *mem;
1314         struct zcomp_strm *zstrm;
1315         struct page *page = bvec->bv_page;
1316         unsigned long element = 0;
1317         enum zram_pageflags flags = 0;
1318 
1319         mem = kmap_atomic(page);
1320         if (page_same_filled(mem, &element)) {
1321                 kunmap_atomic(mem);
1322                 /* Free memory associated with this sector now. */
1323                 flags = ZRAM_SAME;
1324                 atomic64_inc(&zram->stats.same_pages);
1325                 goto out;
1326         }
1327         kunmap_atomic(mem);
1328 
1329 compress_again:
1330         zstrm = zcomp_stream_get(zram->comp);
1331         src = kmap_atomic(page);
1332         ret = zcomp_compress(zstrm, src, &comp_len);
1333         kunmap_atomic(src);
1334 
1335         if (unlikely(ret)) {
1336                 zcomp_stream_put(zram->comp);
1337                 pr_err("Compression failed! err=%d\n", ret);
1338                 zs_free(zram->mem_pool, handle);
1339                 return ret;
1340         }
1341 
1342         if (comp_len >= huge_class_size)
1343                 comp_len = PAGE_SIZE;
1344         /*
1345          * handle allocation has 2 paths:
1346          * a) fast path is executed with preemption disabled (for
1347          *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1348          *  since we can't sleep;
1349          * b) slow path enables preemption and attempts to allocate
1350          *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1351          *  put per-cpu compression stream and, thus, to re-do
1352          *  the compression once handle is allocated.
1353          *
1354          * if we have a 'non-null' handle here then we are coming
1355          * from the slow path and handle has already been allocated.
1356          */
1357         if (!handle)
1358                 handle = zs_malloc(zram->mem_pool, comp_len,
1359                                 __GFP_KSWAPD_RECLAIM |
1360                                 __GFP_NOWARN |
1361                                 __GFP_HIGHMEM |
1362                                 __GFP_MOVABLE);
1363         if (!handle) {
1364                 zcomp_stream_put(zram->comp);
1365                 atomic64_inc(&zram->stats.writestall);
1366                 handle = zs_malloc(zram->mem_pool, comp_len,
1367                                 GFP_NOIO | __GFP_HIGHMEM |
1368                                 __GFP_MOVABLE);
1369                 if (handle)
1370                         goto compress_again;
1371                 return -ENOMEM;
1372         }
1373 
1374         alloced_pages = zs_get_total_pages(zram->mem_pool);
1375         update_used_max(zram, alloced_pages);
1376 
1377         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1378                 zcomp_stream_put(zram->comp);
1379                 zs_free(zram->mem_pool, handle);
1380                 return -ENOMEM;
1381         }
1382 
1383         dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1384 
1385         src = zstrm->buffer;
1386         if (comp_len == PAGE_SIZE)
1387                 src = kmap_atomic(page);
1388         memcpy(dst, src, comp_len);
1389         if (comp_len == PAGE_SIZE)
1390                 kunmap_atomic(src);
1391 
1392         zcomp_stream_put(zram->comp);
1393         zs_unmap_object(zram->mem_pool, handle);
1394         atomic64_add(comp_len, &zram->stats.compr_data_size);
1395 out:
1396         /*
1397          * Free memory associated with this sector
1398          * before overwriting unused sectors.
1399          */
1400         zram_slot_lock(zram, index);
1401         zram_free_page(zram, index);
1402 
1403         if (comp_len == PAGE_SIZE) {
1404                 zram_set_flag(zram, index, ZRAM_HUGE);
1405                 atomic64_inc(&zram->stats.huge_pages);
1406         }
1407 
1408         if (flags) {
1409                 zram_set_flag(zram, index, flags);
1410                 zram_set_element(zram, index, element);
1411         }  else {
1412                 zram_set_handle(zram, index, handle);
1413                 zram_set_obj_size(zram, index, comp_len);
1414         }
1415         zram_slot_unlock(zram, index);
1416 
1417         /* Update stats */
1418         atomic64_inc(&zram->stats.pages_stored);
1419         return ret;
1420 }
1421 
1422 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1423                                 u32 index, int offset, struct bio *bio)
1424 {
1425         int ret;
1426         struct page *page = NULL;
1427         void *src;
1428         struct bio_vec vec;
1429 
1430         vec = *bvec;
1431         if (is_partial_io(bvec)) {
1432                 void *dst;
1433                 /*
1434                  * This is a partial IO. We need to read the full page
1435                  * before to write the changes.
1436                  */
1437                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1438                 if (!page)
1439                         return -ENOMEM;
1440 
1441                 ret = __zram_bvec_read(zram, page, index, bio, true);
1442                 if (ret)
1443                         goto out;
1444 
1445                 src = kmap_atomic(bvec->bv_page);
1446                 dst = kmap_atomic(page);
1447                 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1448                 kunmap_atomic(dst);
1449                 kunmap_atomic(src);
1450 
1451                 vec.bv_page = page;
1452                 vec.bv_len = PAGE_SIZE;
1453                 vec.bv_offset = 0;
1454         }
1455 
1456         ret = __zram_bvec_write(zram, &vec, index, bio);
1457 out:
1458         if (is_partial_io(bvec))
1459                 __free_page(page);
1460         return ret;
1461 }
1462 
1463 /*
1464  * zram_bio_discard - handler on discard request
1465  * @index: physical block index in PAGE_SIZE units
1466  * @offset: byte offset within physical block
1467  */
1468 static void zram_bio_discard(struct zram *zram, u32 index,
1469                              int offset, struct bio *bio)
1470 {
1471         size_t n = bio->bi_iter.bi_size;
1472 
1473         /*
1474          * zram manages data in physical block size units. Because logical block
1475          * size isn't identical with physical block size on some arch, we
1476          * could get a discard request pointing to a specific offset within a
1477          * certain physical block.  Although we can handle this request by
1478          * reading that physiclal block and decompressing and partially zeroing
1479          * and re-compressing and then re-storing it, this isn't reasonable
1480          * because our intent with a discard request is to save memory.  So
1481          * skipping this logical block is appropriate here.
1482          */
1483         if (offset) {
1484                 if (n <= (PAGE_SIZE - offset))
1485                         return;
1486 
1487                 n -= (PAGE_SIZE - offset);
1488                 index++;
1489         }
1490 
1491         while (n >= PAGE_SIZE) {
1492                 zram_slot_lock(zram, index);
1493                 zram_free_page(zram, index);
1494                 zram_slot_unlock(zram, index);
1495                 atomic64_inc(&zram->stats.notify_free);
1496                 index++;
1497                 n -= PAGE_SIZE;
1498         }
1499 }
1500 
1501 /*
1502  * Returns errno if it has some problem. Otherwise return 0 or 1.
1503  * Returns 0 if IO request was done synchronously
1504  * Returns 1 if IO request was successfully submitted.
1505  */
1506 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1507                         int offset, unsigned int op, struct bio *bio)
1508 {
1509         unsigned long start_time = jiffies;
1510         struct request_queue *q = zram->disk->queue;
1511         int ret;
1512 
1513         generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
1514                         &zram->disk->part0);
1515 
1516         if (!op_is_write(op)) {
1517                 atomic64_inc(&zram->stats.num_reads);
1518                 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1519                 flush_dcache_page(bvec->bv_page);
1520         } else {
1521                 atomic64_inc(&zram->stats.num_writes);
1522                 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1523         }
1524 
1525         generic_end_io_acct(q, op, &zram->disk->part0, start_time);
1526 
1527         zram_slot_lock(zram, index);
1528         zram_accessed(zram, index);
1529         zram_slot_unlock(zram, index);
1530 
1531         if (unlikely(ret < 0)) {
1532                 if (!op_is_write(op))
1533                         atomic64_inc(&zram->stats.failed_reads);
1534                 else
1535                         atomic64_inc(&zram->stats.failed_writes);
1536         }
1537 
1538         return ret;
1539 }
1540 
1541 static void __zram_make_request(struct zram *zram, struct bio *bio)
1542 {
1543         int offset;
1544         u32 index;
1545         struct bio_vec bvec;
1546         struct bvec_iter iter;
1547 
1548         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1549         offset = (bio->bi_iter.bi_sector &
1550                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1551 
1552         switch (bio_op(bio)) {
1553         case REQ_OP_DISCARD:
1554         case REQ_OP_WRITE_ZEROES:
1555                 zram_bio_discard(zram, index, offset, bio);
1556                 bio_endio(bio);
1557                 return;
1558         default:
1559                 break;
1560         }
1561 
1562         bio_for_each_segment(bvec, bio, iter) {
1563                 struct bio_vec bv = bvec;
1564                 unsigned int unwritten = bvec.bv_len;
1565 
1566                 do {
1567                         bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1568                                                         unwritten);
1569                         if (zram_bvec_rw(zram, &bv, index, offset,
1570                                          bio_op(bio), bio) < 0)
1571                                 goto out;
1572 
1573                         bv.bv_offset += bv.bv_len;
1574                         unwritten -= bv.bv_len;
1575 
1576                         update_position(&index, &offset, &bv);
1577                 } while (unwritten);
1578         }
1579 
1580         bio_endio(bio);
1581         return;
1582 
1583 out:
1584         bio_io_error(bio);
1585 }
1586 
1587 /*
1588  * Handler function for all zram I/O requests.
1589  */
1590 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
1591 {
1592         struct zram *zram = queue->queuedata;
1593 
1594         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1595                                         bio->bi_iter.bi_size)) {
1596                 atomic64_inc(&zram->stats.invalid_io);
1597                 goto error;
1598         }
1599 
1600         __zram_make_request(zram, bio);
1601         return BLK_QC_T_NONE;
1602 
1603 error:
1604         bio_io_error(bio);
1605         return BLK_QC_T_NONE;
1606 }
1607 
1608 static void zram_slot_free_notify(struct block_device *bdev,
1609                                 unsigned long index)
1610 {
1611         struct zram *zram;
1612 
1613         zram = bdev->bd_disk->private_data;
1614 
1615         atomic64_inc(&zram->stats.notify_free);
1616         if (!zram_slot_trylock(zram, index)) {
1617                 atomic64_inc(&zram->stats.miss_free);
1618                 return;
1619         }
1620 
1621         zram_free_page(zram, index);
1622         zram_slot_unlock(zram, index);
1623 }
1624 
1625 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1626                        struct page *page, unsigned int op)
1627 {
1628         int offset, ret;
1629         u32 index;
1630         struct zram *zram;
1631         struct bio_vec bv;
1632 
1633         if (PageTransHuge(page))
1634                 return -ENOTSUPP;
1635         zram = bdev->bd_disk->private_data;
1636 
1637         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1638                 atomic64_inc(&zram->stats.invalid_io);
1639                 ret = -EINVAL;
1640                 goto out;
1641         }
1642 
1643         index = sector >> SECTORS_PER_PAGE_SHIFT;
1644         offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1645 
1646         bv.bv_page = page;
1647         bv.bv_len = PAGE_SIZE;
1648         bv.bv_offset = 0;
1649 
1650         ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1651 out:
1652         /*
1653          * If I/O fails, just return error(ie, non-zero) without
1654          * calling page_endio.
1655          * It causes resubmit the I/O with bio request by upper functions
1656          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1657          * bio->bi_end_io does things to handle the error
1658          * (e.g., SetPageError, set_page_dirty and extra works).
1659          */
1660         if (unlikely(ret < 0))
1661                 return ret;
1662 
1663         switch (ret) {
1664         case 0:
1665                 page_endio(page, op_is_write(op), 0);
1666                 break;
1667         case 1:
1668                 ret = 0;
1669                 break;
1670         default:
1671                 WARN_ON(1);
1672         }
1673         return ret;
1674 }
1675 
1676 static void zram_reset_device(struct zram *zram)
1677 {
1678         struct zcomp *comp;
1679         u64 disksize;
1680 
1681         down_write(&zram->init_lock);
1682 
1683         zram->limit_pages = 0;
1684 
1685         if (!init_done(zram)) {
1686                 up_write(&zram->init_lock);
1687                 return;
1688         }
1689 
1690         comp = zram->comp;
1691         disksize = zram->disksize;
1692         zram->disksize = 0;
1693 
1694         set_capacity(zram->disk, 0);
1695         part_stat_set_all(&zram->disk->part0, 0);
1696 
1697         up_write(&zram->init_lock);
1698         /* I/O operation under all of CPU are done so let's free */
1699         zram_meta_free(zram, disksize);
1700         memset(&zram->stats, 0, sizeof(zram->stats));
1701         zcomp_destroy(comp);
1702         reset_bdev(zram);
1703 }
1704 
1705 static ssize_t disksize_store(struct device *dev,
1706                 struct device_attribute *attr, const char *buf, size_t len)
1707 {
1708         u64 disksize;
1709         struct zcomp *comp;
1710         struct zram *zram = dev_to_zram(dev);
1711         int err;
1712 
1713         disksize = memparse(buf, NULL);
1714         if (!disksize)
1715                 return -EINVAL;
1716 
1717         down_write(&zram->init_lock);
1718         if (init_done(zram)) {
1719                 pr_info("Cannot change disksize for initialized device\n");
1720                 err = -EBUSY;
1721                 goto out_unlock;
1722         }
1723 
1724         disksize = PAGE_ALIGN(disksize);
1725         if (!zram_meta_alloc(zram, disksize)) {
1726                 err = -ENOMEM;
1727                 goto out_unlock;
1728         }
1729 
1730         comp = zcomp_create(zram->compressor);
1731         if (IS_ERR(comp)) {
1732                 pr_err("Cannot initialise %s compressing backend\n",
1733                                 zram->compressor);
1734                 err = PTR_ERR(comp);
1735                 goto out_free_meta;
1736         }
1737 
1738         zram->comp = comp;
1739         zram->disksize = disksize;
1740         set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1741 
1742         revalidate_disk(zram->disk);
1743         up_write(&zram->init_lock);
1744 
1745         return len;
1746 
1747 out_free_meta:
1748         zram_meta_free(zram, disksize);
1749 out_unlock:
1750         up_write(&zram->init_lock);
1751         return err;
1752 }
1753 
1754 static ssize_t reset_store(struct device *dev,
1755                 struct device_attribute *attr, const char *buf, size_t len)
1756 {
1757         int ret;
1758         unsigned short do_reset;
1759         struct zram *zram;
1760         struct block_device *bdev;
1761 
1762         ret = kstrtou16(buf, 10, &do_reset);
1763         if (ret)
1764                 return ret;
1765 
1766         if (!do_reset)
1767                 return -EINVAL;
1768 
1769         zram = dev_to_zram(dev);
1770         bdev = bdget_disk(zram->disk, 0);
1771         if (!bdev)
1772                 return -ENOMEM;
1773 
1774         mutex_lock(&bdev->bd_mutex);
1775         /* Do not reset an active device or claimed device */
1776         if (bdev->bd_openers || zram->claim) {
1777                 mutex_unlock(&bdev->bd_mutex);
1778                 bdput(bdev);
1779                 return -EBUSY;
1780         }
1781 
1782         /* From now on, anyone can't open /dev/zram[0-9] */
1783         zram->claim = true;
1784         mutex_unlock(&bdev->bd_mutex);
1785 
1786         /* Make sure all the pending I/O are finished */
1787         fsync_bdev(bdev);
1788         zram_reset_device(zram);
1789         revalidate_disk(zram->disk);
1790         bdput(bdev);
1791 
1792         mutex_lock(&bdev->bd_mutex);
1793         zram->claim = false;
1794         mutex_unlock(&bdev->bd_mutex);
1795 
1796         return len;
1797 }
1798 
1799 static int zram_open(struct block_device *bdev, fmode_t mode)
1800 {
1801         int ret = 0;
1802         struct zram *zram;
1803 
1804         WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1805 
1806         zram = bdev->bd_disk->private_data;
1807         /* zram was claimed to reset so open request fails */
1808         if (zram->claim)
1809                 ret = -EBUSY;
1810 
1811         return ret;
1812 }
1813 
1814 static const struct block_device_operations zram_devops = {
1815         .open = zram_open,
1816         .swap_slot_free_notify = zram_slot_free_notify,
1817         .rw_page = zram_rw_page,
1818         .owner = THIS_MODULE
1819 };
1820 
1821 static DEVICE_ATTR_WO(compact);
1822 static DEVICE_ATTR_RW(disksize);
1823 static DEVICE_ATTR_RO(initstate);
1824 static DEVICE_ATTR_WO(reset);
1825 static DEVICE_ATTR_WO(mem_limit);
1826 static DEVICE_ATTR_WO(mem_used_max);
1827 static DEVICE_ATTR_WO(idle);
1828 static DEVICE_ATTR_RW(max_comp_streams);
1829 static DEVICE_ATTR_RW(comp_algorithm);
1830 #ifdef CONFIG_ZRAM_WRITEBACK
1831 static DEVICE_ATTR_RW(backing_dev);
1832 static DEVICE_ATTR_WO(writeback);
1833 static DEVICE_ATTR_RW(writeback_limit);
1834 static DEVICE_ATTR_RW(writeback_limit_enable);
1835 #endif
1836 
1837 static struct attribute *zram_disk_attrs[] = {
1838         &dev_attr_disksize.attr,
1839         &dev_attr_initstate.attr,
1840         &dev_attr_reset.attr,
1841         &dev_attr_compact.attr,
1842         &dev_attr_mem_limit.attr,
1843         &dev_attr_mem_used_max.attr,
1844         &dev_attr_idle.attr,
1845         &dev_attr_max_comp_streams.attr,
1846         &dev_attr_comp_algorithm.attr,
1847 #ifdef CONFIG_ZRAM_WRITEBACK
1848         &dev_attr_backing_dev.attr,
1849         &dev_attr_writeback.attr,
1850         &dev_attr_writeback_limit.attr,
1851         &dev_attr_writeback_limit_enable.attr,
1852 #endif
1853         &dev_attr_io_stat.attr,
1854         &dev_attr_mm_stat.attr,
1855 #ifdef CONFIG_ZRAM_WRITEBACK
1856         &dev_attr_bd_stat.attr,
1857 #endif
1858         &dev_attr_debug_stat.attr,
1859         NULL,
1860 };
1861 
1862 static const struct attribute_group zram_disk_attr_group = {
1863         .attrs = zram_disk_attrs,
1864 };
1865 
1866 static const struct attribute_group *zram_disk_attr_groups[] = {
1867         &zram_disk_attr_group,
1868         NULL,
1869 };
1870 
1871 /*
1872  * Allocate and initialize new zram device. the function returns
1873  * '>= 0' device_id upon success, and negative value otherwise.
1874  */
1875 static int zram_add(void)
1876 {
1877         struct zram *zram;
1878         struct request_queue *queue;
1879         int ret, device_id;
1880 
1881         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1882         if (!zram)
1883                 return -ENOMEM;
1884 
1885         ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1886         if (ret < 0)
1887                 goto out_free_dev;
1888         device_id = ret;
1889 
1890         init_rwsem(&zram->init_lock);
1891 #ifdef CONFIG_ZRAM_WRITEBACK
1892         spin_lock_init(&zram->wb_limit_lock);
1893 #endif
1894         queue = blk_alloc_queue(GFP_KERNEL);
1895         if (!queue) {
1896                 pr_err("Error allocating disk queue for device %d\n",
1897                         device_id);
1898                 ret = -ENOMEM;
1899                 goto out_free_idr;
1900         }
1901 
1902         blk_queue_make_request(queue, zram_make_request);
1903 
1904         /* gendisk structure */
1905         zram->disk = alloc_disk(1);
1906         if (!zram->disk) {
1907                 pr_err("Error allocating disk structure for device %d\n",
1908                         device_id);
1909                 ret = -ENOMEM;
1910                 goto out_free_queue;
1911         }
1912 
1913         zram->disk->major = zram_major;
1914         zram->disk->first_minor = device_id;
1915         zram->disk->fops = &zram_devops;
1916         zram->disk->queue = queue;
1917         zram->disk->queue->queuedata = zram;
1918         zram->disk->private_data = zram;
1919         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1920 
1921         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1922         set_capacity(zram->disk, 0);
1923         /* zram devices sort of resembles non-rotational disks */
1924         blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1925         blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1926 
1927         /*
1928          * To ensure that we always get PAGE_SIZE aligned
1929          * and n*PAGE_SIZED sized I/O requests.
1930          */
1931         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1932         blk_queue_logical_block_size(zram->disk->queue,
1933                                         ZRAM_LOGICAL_BLOCK_SIZE);
1934         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1935         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1936         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1937         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1938         blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1939 
1940         /*
1941          * zram_bio_discard() will clear all logical blocks if logical block
1942          * size is identical with physical block size(PAGE_SIZE). But if it is
1943          * different, we will skip discarding some parts of logical blocks in
1944          * the part of the request range which isn't aligned to physical block
1945          * size.  So we can't ensure that all discarded logical blocks are
1946          * zeroed.
1947          */
1948         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1949                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1950 
1951         zram->disk->queue->backing_dev_info->capabilities |=
1952                         (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
1953         device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
1954 
1955         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1956 
1957         zram_debugfs_register(zram);
1958         pr_info("Added device: %s\n", zram->disk->disk_name);
1959         return device_id;
1960 
1961 out_free_queue:
1962         blk_cleanup_queue(queue);
1963 out_free_idr:
1964         idr_remove(&zram_index_idr, device_id);
1965 out_free_dev:
1966         kfree(zram);
1967         return ret;
1968 }
1969 
1970 static int zram_remove(struct zram *zram)
1971 {
1972         struct block_device *bdev;
1973 
1974         bdev = bdget_disk(zram->disk, 0);
1975         if (!bdev)
1976                 return -ENOMEM;
1977 
1978         mutex_lock(&bdev->bd_mutex);
1979         if (bdev->bd_openers || zram->claim) {
1980                 mutex_unlock(&bdev->bd_mutex);
1981                 bdput(bdev);
1982                 return -EBUSY;
1983         }
1984 
1985         zram->claim = true;
1986         mutex_unlock(&bdev->bd_mutex);
1987 
1988         zram_debugfs_unregister(zram);
1989 
1990         /* Make sure all the pending I/O are finished */
1991         fsync_bdev(bdev);
1992         zram_reset_device(zram);
1993         bdput(bdev);
1994 
1995         pr_info("Removed device: %s\n", zram->disk->disk_name);
1996 
1997         del_gendisk(zram->disk);
1998         blk_cleanup_queue(zram->disk->queue);
1999         put_disk(zram->disk);
2000         kfree(zram);
2001         return 0;
2002 }
2003 
2004 /* zram-control sysfs attributes */
2005 
2006 /*
2007  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2008  * sense that reading from this file does alter the state of your system -- it
2009  * creates a new un-initialized zram device and returns back this device's
2010  * device_id (or an error code if it fails to create a new device).
2011  */
2012 static ssize_t hot_add_show(struct class *class,
2013                         struct class_attribute *attr,
2014                         char *buf)
2015 {
2016         int ret;
2017 
2018         mutex_lock(&zram_index_mutex);
2019         ret = zram_add();
2020         mutex_unlock(&zram_index_mutex);
2021 
2022         if (ret < 0)
2023                 return ret;
2024         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2025 }
2026 static CLASS_ATTR_RO(hot_add);
2027 
2028 static ssize_t hot_remove_store(struct class *class,
2029                         struct class_attribute *attr,
2030                         const char *buf,
2031                         size_t count)
2032 {
2033         struct zram *zram;
2034         int ret, dev_id;
2035 
2036         /* dev_id is gendisk->first_minor, which is `int' */
2037         ret = kstrtoint(buf, 10, &dev_id);
2038         if (ret)
2039                 return ret;
2040         if (dev_id < 0)
2041                 return -EINVAL;
2042 
2043         mutex_lock(&zram_index_mutex);
2044 
2045         zram = idr_find(&zram_index_idr, dev_id);
2046         if (zram) {
2047                 ret = zram_remove(zram);
2048                 if (!ret)
2049                         idr_remove(&zram_index_idr, dev_id);
2050         } else {
2051                 ret = -ENODEV;
2052         }
2053 
2054         mutex_unlock(&zram_index_mutex);
2055         return ret ? ret : count;
2056 }
2057 static CLASS_ATTR_WO(hot_remove);
2058 
2059 static struct attribute *zram_control_class_attrs[] = {
2060         &class_attr_hot_add.attr,
2061         &class_attr_hot_remove.attr,
2062         NULL,
2063 };
2064 ATTRIBUTE_GROUPS(zram_control_class);
2065 
2066 static struct class zram_control_class = {
2067         .name           = "zram-control",
2068         .owner          = THIS_MODULE,
2069         .class_groups   = zram_control_class_groups,
2070 };
2071 
2072 static int zram_remove_cb(int id, void *ptr, void *data)
2073 {
2074         zram_remove(ptr);
2075         return 0;
2076 }
2077 
2078 static void destroy_devices(void)
2079 {
2080         class_unregister(&zram_control_class);
2081         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2082         zram_debugfs_destroy();
2083         idr_destroy(&zram_index_idr);
2084         unregister_blkdev(zram_major, "zram");
2085         cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2086 }
2087 
2088 static int __init zram_init(void)
2089 {
2090         int ret;
2091 
2092         ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2093                                       zcomp_cpu_up_prepare, zcomp_cpu_dead);
2094         if (ret < 0)
2095                 return ret;
2096 
2097         ret = class_register(&zram_control_class);
2098         if (ret) {
2099                 pr_err("Unable to register zram-control class\n");
2100                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2101                 return ret;
2102         }
2103 
2104         zram_debugfs_create();
2105         zram_major = register_blkdev(0, "zram");
2106         if (zram_major <= 0) {
2107                 pr_err("Unable to get major number\n");
2108                 class_unregister(&zram_control_class);
2109                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2110                 return -EBUSY;
2111         }
2112 
2113         while (num_devices != 0) {
2114                 mutex_lock(&zram_index_mutex);
2115                 ret = zram_add();
2116                 mutex_unlock(&zram_index_mutex);
2117                 if (ret < 0)
2118                         goto out_error;
2119                 num_devices--;
2120         }
2121 
2122         return 0;
2123 
2124 out_error:
2125         destroy_devices();
2126         return ret;
2127 }
2128 
2129 static void __exit zram_exit(void)
2130 {
2131         destroy_devices();
2132 }
2133 
2134 module_init(zram_init);
2135 module_exit(zram_exit);
2136 
2137 module_param(num_devices, uint, 0);
2138 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2139 
2140 MODULE_LICENSE("Dual BSD/GPL");
2141 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2142 MODULE_DESCRIPTION("Compressed RAM Block Device");

/* [<][>][^][v][top][bottom][index][help] */