root/drivers/md/dm-clone-metadata.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sb_prepare_for_write
  2. sb_check
  3. __superblock_all_zeroes
  4. superblock_read_lock
  5. superblock_write_lock
  6. superblock_write_lock_zero
  7. __copy_sm_root
  8. __prepare_superblock
  9. __open_metadata
  10. __format_metadata
  11. __open_or_format_metadata
  12. __create_persistent_data_structures
  13. __destroy_persistent_data_structures
  14. bitmap_size
  15. __dirty_map_init
  16. __dirty_map_exit
  17. dirty_map_init
  18. dirty_map_exit
  19. __load_bitset_in_core
  20. dm_clone_metadata_open
  21. dm_clone_metadata_close
  22. dm_clone_is_hydration_done
  23. dm_clone_is_region_hydrated
  24. dm_clone_is_range_hydrated
  25. dm_clone_nr_of_hydrated_regions
  26. dm_clone_find_next_unhydrated_region
  27. __update_metadata_word
  28. __metadata_commit
  29. __flush_dmap
  30. dm_clone_metadata_pre_commit
  31. dm_clone_metadata_commit
  32. dm_clone_set_region_hydrated
  33. dm_clone_cond_set_range
  34. dm_clone_reload_in_core_bitset
  35. dm_clone_changed_this_transaction
  36. dm_clone_metadata_abort
  37. dm_clone_metadata_set_read_only
  38. dm_clone_metadata_set_read_write
  39. dm_clone_get_free_metadata_block_count
  40. dm_clone_get_metadata_dev_size

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
   4  */
   5 
   6 #include <linux/mm.h>
   7 #include <linux/err.h>
   8 #include <linux/slab.h>
   9 #include <linux/rwsem.h>
  10 #include <linux/bitops.h>
  11 #include <linux/bitmap.h>
  12 #include <linux/device-mapper.h>
  13 
  14 #include "persistent-data/dm-bitset.h"
  15 #include "persistent-data/dm-space-map.h"
  16 #include "persistent-data/dm-block-manager.h"
  17 #include "persistent-data/dm-transaction-manager.h"
  18 
  19 #include "dm-clone-metadata.h"
  20 
  21 #define DM_MSG_PREFIX "clone metadata"
  22 
  23 #define SUPERBLOCK_LOCATION 0
  24 #define SUPERBLOCK_MAGIC 0x8af27f64
  25 #define SUPERBLOCK_CSUM_XOR 257649492
  26 
  27 #define DM_CLONE_MAX_CONCURRENT_LOCKS 5
  28 
  29 #define UUID_LEN 16
  30 
  31 /* Min and max dm-clone metadata versions supported */
  32 #define DM_CLONE_MIN_METADATA_VERSION 1
  33 #define DM_CLONE_MAX_METADATA_VERSION 1
  34 
  35 /*
  36  * On-disk metadata layout
  37  */
  38 struct superblock_disk {
  39         __le32 csum;
  40         __le32 flags;
  41         __le64 blocknr;
  42 
  43         __u8 uuid[UUID_LEN];
  44         __le64 magic;
  45         __le32 version;
  46 
  47         __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
  48 
  49         __le64 region_size;
  50         __le64 target_size;
  51 
  52         __le64 bitset_root;
  53 } __packed;
  54 
  55 /*
  56  * Region and Dirty bitmaps.
  57  *
  58  * dm-clone logically splits the source and destination devices in regions of
  59  * fixed size. The destination device's regions are gradually hydrated, i.e.,
  60  * we copy (clone) the source's regions to the destination device. Eventually,
  61  * all regions will get hydrated and all I/O will be served from the
  62  * destination device.
  63  *
  64  * We maintain an on-disk bitmap which tracks the state of each of the
  65  * destination device's regions, i.e., whether they are hydrated or not.
  66  *
  67  * To save constantly doing look ups on disk we keep an in core copy of the
  68  * on-disk bitmap, the region_map.
  69  *
  70  * In order to track which regions are hydrated during a metadata transaction,
  71  * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
  72  * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
  73  * tracks the regions that got hydrated during the current metadata
  74  * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
  75  * the dirty_regions bitmap.
  76  *
  77  * This allows us to precisely track the regions that were hydrated during the
  78  * current metadata transaction and update the metadata accordingly, when we
  79  * commit the current transaction. This is important because dm-clone should
  80  * only commit the metadata of regions that were properly flushed to the
  81  * destination device beforehand. Otherwise, in case of a crash, we could end
  82  * up with a corrupted dm-clone device.
  83  *
  84  * When a region finishes hydrating dm-clone calls
  85  * dm_clone_set_region_hydrated(), or for discard requests
  86  * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
  87  * and dmap.
  88  *
  89  * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
  90  * and update the on-disk metadata accordingly. Thus, we don't have to flush to
  91  * disk the whole region_map. We can just flush the dirty region_map bits.
  92  *
  93  * We use the helper dmap->dirty_words bitmap, which is smaller than the
  94  * original region_map, to reduce the amount of memory accesses during a
  95  * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
  96  * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
  97  * accesses.
  98  *
  99  * We could update directly the on-disk bitmap, when dm-clone calls either
 100  * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
 101  * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
 102  * these two functions don't block, we can call them in interrupt context,
 103  * e.g., in a hooked overwrite bio's completion routine, and further reduce the
 104  * I/O completion latency.
 105  *
 106  * We maintain two dirty bitmap sets. During a metadata commit we atomically
 107  * swap the currently used dmap with the unused one. This allows the metadata
 108  * update functions to run concurrently with an ongoing commit.
 109  */
 110 struct dirty_map {
 111         unsigned long *dirty_words;
 112         unsigned long *dirty_regions;
 113         unsigned int changed;
 114 };
 115 
 116 struct dm_clone_metadata {
 117         /* The metadata block device */
 118         struct block_device *bdev;
 119 
 120         sector_t target_size;
 121         sector_t region_size;
 122         unsigned long nr_regions;
 123         unsigned long nr_words;
 124 
 125         /* Spinlock protecting the region and dirty bitmaps. */
 126         spinlock_t bitmap_lock;
 127         struct dirty_map dmap[2];
 128         struct dirty_map *current_dmap;
 129 
 130         /* Protected by lock */
 131         struct dirty_map *committing_dmap;
 132 
 133         /*
 134          * In core copy of the on-disk bitmap to save constantly doing look ups
 135          * on disk.
 136          */
 137         unsigned long *region_map;
 138 
 139         /* Protected by bitmap_lock */
 140         unsigned int read_only;
 141 
 142         struct dm_block_manager *bm;
 143         struct dm_space_map *sm;
 144         struct dm_transaction_manager *tm;
 145 
 146         struct rw_semaphore lock;
 147 
 148         struct dm_disk_bitset bitset_info;
 149         dm_block_t bitset_root;
 150 
 151         /*
 152          * Reading the space map root can fail, so we read it into this
 153          * buffer before the superblock is locked and updated.
 154          */
 155         __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
 156 
 157         bool hydration_done:1;
 158         bool fail_io:1;
 159 };
 160 
 161 /*---------------------------------------------------------------------------*/
 162 
 163 /*
 164  * Superblock validation.
 165  */
 166 static void sb_prepare_for_write(struct dm_block_validator *v,
 167                                  struct dm_block *b, size_t sb_block_size)
 168 {
 169         struct superblock_disk *sb;
 170         u32 csum;
 171 
 172         sb = dm_block_data(b);
 173         sb->blocknr = cpu_to_le64(dm_block_location(b));
 174 
 175         csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
 176                               SUPERBLOCK_CSUM_XOR);
 177         sb->csum = cpu_to_le32(csum);
 178 }
 179 
 180 static int sb_check(struct dm_block_validator *v, struct dm_block *b,
 181                     size_t sb_block_size)
 182 {
 183         struct superblock_disk *sb;
 184         u32 csum, metadata_version;
 185 
 186         sb = dm_block_data(b);
 187 
 188         if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
 189                 DMERR("Superblock check failed: blocknr %llu, expected %llu",
 190                       le64_to_cpu(sb->blocknr),
 191                       (unsigned long long)dm_block_location(b));
 192                 return -ENOTBLK;
 193         }
 194 
 195         if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
 196                 DMERR("Superblock check failed: magic %llu, expected %llu",
 197                       le64_to_cpu(sb->magic),
 198                       (unsigned long long)SUPERBLOCK_MAGIC);
 199                 return -EILSEQ;
 200         }
 201 
 202         csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
 203                               SUPERBLOCK_CSUM_XOR);
 204         if (sb->csum != cpu_to_le32(csum)) {
 205                 DMERR("Superblock check failed: checksum %u, expected %u",
 206                       csum, le32_to_cpu(sb->csum));
 207                 return -EILSEQ;
 208         }
 209 
 210         /* Check metadata version */
 211         metadata_version = le32_to_cpu(sb->version);
 212         if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
 213             metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
 214                 DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
 215                       metadata_version, DM_CLONE_MIN_METADATA_VERSION,
 216                       DM_CLONE_MAX_METADATA_VERSION);
 217                 return -EINVAL;
 218         }
 219 
 220         return 0;
 221 }
 222 
 223 static struct dm_block_validator sb_validator = {
 224         .name = "superblock",
 225         .prepare_for_write = sb_prepare_for_write,
 226         .check = sb_check
 227 };
 228 
 229 /*
 230  * Check if the superblock is formatted or not. We consider the superblock to
 231  * be formatted in case we find non-zero bytes in it.
 232  */
 233 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
 234 {
 235         int r;
 236         unsigned int i, nr_words;
 237         struct dm_block *sblock;
 238         __le64 *data_le, zero = cpu_to_le64(0);
 239 
 240         /*
 241          * We don't use a validator here because the superblock could be all
 242          * zeroes.
 243          */
 244         r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
 245         if (r) {
 246                 DMERR("Failed to read_lock superblock");
 247                 return r;
 248         }
 249 
 250         data_le = dm_block_data(sblock);
 251         *formatted = false;
 252 
 253         /* This assumes that the block size is a multiple of 8 bytes */
 254         BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
 255         nr_words = dm_bm_block_size(bm) / sizeof(__le64);
 256         for (i = 0; i < nr_words; i++) {
 257                 if (data_le[i] != zero) {
 258                         *formatted = true;
 259                         break;
 260                 }
 261         }
 262 
 263         dm_bm_unlock(sblock);
 264 
 265         return 0;
 266 }
 267 
 268 /*---------------------------------------------------------------------------*/
 269 
 270 /*
 271  * Low-level metadata handling.
 272  */
 273 static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
 274                                        struct dm_block **sblock)
 275 {
 276         return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
 277 }
 278 
 279 static inline int superblock_write_lock(struct dm_clone_metadata *cmd,
 280                                         struct dm_block **sblock)
 281 {
 282         return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
 283 }
 284 
 285 static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
 286                                              struct dm_block **sblock)
 287 {
 288         return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
 289 }
 290 
 291 static int __copy_sm_root(struct dm_clone_metadata *cmd)
 292 {
 293         int r;
 294         size_t root_size;
 295 
 296         r = dm_sm_root_size(cmd->sm, &root_size);
 297         if (r)
 298                 return r;
 299 
 300         return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
 301 }
 302 
 303 /* Save dm-clone metadata in superblock */
 304 static void __prepare_superblock(struct dm_clone_metadata *cmd,
 305                                  struct superblock_disk *sb)
 306 {
 307         sb->flags = cpu_to_le32(0UL);
 308 
 309         /* FIXME: UUID is currently unused */
 310         memset(sb->uuid, 0, sizeof(sb->uuid));
 311 
 312         sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
 313         sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
 314 
 315         /* Save the metadata space_map root */
 316         memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
 317                sizeof(cmd->metadata_space_map_root));
 318 
 319         sb->region_size = cpu_to_le64(cmd->region_size);
 320         sb->target_size = cpu_to_le64(cmd->target_size);
 321         sb->bitset_root = cpu_to_le64(cmd->bitset_root);
 322 }
 323 
 324 static int __open_metadata(struct dm_clone_metadata *cmd)
 325 {
 326         int r;
 327         struct dm_block *sblock;
 328         struct superblock_disk *sb;
 329 
 330         r = superblock_read_lock(cmd, &sblock);
 331 
 332         if (r) {
 333                 DMERR("Failed to read_lock superblock");
 334                 return r;
 335         }
 336 
 337         sb = dm_block_data(sblock);
 338 
 339         /* Verify that target_size and region_size haven't changed. */
 340         if (cmd->region_size != le64_to_cpu(sb->region_size) ||
 341             cmd->target_size != le64_to_cpu(sb->target_size)) {
 342                 DMERR("Region and/or target size don't match the ones in metadata");
 343                 r = -EINVAL;
 344                 goto out_with_lock;
 345         }
 346 
 347         r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
 348                                sb->metadata_space_map_root,
 349                                sizeof(sb->metadata_space_map_root),
 350                                &cmd->tm, &cmd->sm);
 351 
 352         if (r) {
 353                 DMERR("dm_tm_open_with_sm failed");
 354                 goto out_with_lock;
 355         }
 356 
 357         dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
 358         cmd->bitset_root = le64_to_cpu(sb->bitset_root);
 359 
 360 out_with_lock:
 361         dm_bm_unlock(sblock);
 362 
 363         return r;
 364 }
 365 
 366 static int __format_metadata(struct dm_clone_metadata *cmd)
 367 {
 368         int r;
 369         struct dm_block *sblock;
 370         struct superblock_disk *sb;
 371 
 372         r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
 373         if (r) {
 374                 DMERR("Failed to create transaction manager");
 375                 return r;
 376         }
 377 
 378         dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
 379 
 380         r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
 381         if (r) {
 382                 DMERR("Failed to create empty on-disk bitset");
 383                 goto err_with_tm;
 384         }
 385 
 386         r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
 387                              cmd->nr_regions, false, &cmd->bitset_root);
 388         if (r) {
 389                 DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
 390                 goto err_with_tm;
 391         }
 392 
 393         /* Flush to disk all blocks, except the superblock */
 394         r = dm_tm_pre_commit(cmd->tm);
 395         if (r) {
 396                 DMERR("dm_tm_pre_commit failed");
 397                 goto err_with_tm;
 398         }
 399 
 400         r = __copy_sm_root(cmd);
 401         if (r) {
 402                 DMERR("__copy_sm_root failed");
 403                 goto err_with_tm;
 404         }
 405 
 406         r = superblock_write_lock_zero(cmd, &sblock);
 407         if (r) {
 408                 DMERR("Failed to write_lock superblock");
 409                 goto err_with_tm;
 410         }
 411 
 412         sb = dm_block_data(sblock);
 413         __prepare_superblock(cmd, sb);
 414         r = dm_tm_commit(cmd->tm, sblock);
 415         if (r) {
 416                 DMERR("Failed to commit superblock");
 417                 goto err_with_tm;
 418         }
 419 
 420         return 0;
 421 
 422 err_with_tm:
 423         dm_sm_destroy(cmd->sm);
 424         dm_tm_destroy(cmd->tm);
 425 
 426         return r;
 427 }
 428 
 429 static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
 430 {
 431         int r;
 432         bool formatted = false;
 433 
 434         r = __superblock_all_zeroes(cmd->bm, &formatted);
 435         if (r)
 436                 return r;
 437 
 438         if (!formatted)
 439                 return may_format_device ? __format_metadata(cmd) : -EPERM;
 440 
 441         return __open_metadata(cmd);
 442 }
 443 
 444 static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
 445                                                bool may_format_device)
 446 {
 447         int r;
 448 
 449         /* Create block manager */
 450         cmd->bm = dm_block_manager_create(cmd->bdev,
 451                                          DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
 452                                          DM_CLONE_MAX_CONCURRENT_LOCKS);
 453         if (IS_ERR(cmd->bm)) {
 454                 DMERR("Failed to create block manager");
 455                 return PTR_ERR(cmd->bm);
 456         }
 457 
 458         r = __open_or_format_metadata(cmd, may_format_device);
 459         if (r)
 460                 dm_block_manager_destroy(cmd->bm);
 461 
 462         return r;
 463 }
 464 
 465 static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
 466 {
 467         dm_sm_destroy(cmd->sm);
 468         dm_tm_destroy(cmd->tm);
 469         dm_block_manager_destroy(cmd->bm);
 470 }
 471 
 472 /*---------------------------------------------------------------------------*/
 473 
 474 static size_t bitmap_size(unsigned long nr_bits)
 475 {
 476         return BITS_TO_LONGS(nr_bits) * sizeof(long);
 477 }
 478 
 479 static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
 480                             unsigned long nr_regions)
 481 {
 482         dmap->changed = 0;
 483 
 484         dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
 485         if (!dmap->dirty_words)
 486                 return -ENOMEM;
 487 
 488         dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
 489         if (!dmap->dirty_regions) {
 490                 kvfree(dmap->dirty_words);
 491                 return -ENOMEM;
 492         }
 493 
 494         return 0;
 495 }
 496 
 497 static void __dirty_map_exit(struct dirty_map *dmap)
 498 {
 499         kvfree(dmap->dirty_words);
 500         kvfree(dmap->dirty_regions);
 501 }
 502 
 503 static int dirty_map_init(struct dm_clone_metadata *cmd)
 504 {
 505         if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
 506                 DMERR("Failed to allocate dirty bitmap");
 507                 return -ENOMEM;
 508         }
 509 
 510         if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
 511                 DMERR("Failed to allocate dirty bitmap");
 512                 __dirty_map_exit(&cmd->dmap[0]);
 513                 return -ENOMEM;
 514         }
 515 
 516         cmd->current_dmap = &cmd->dmap[0];
 517         cmd->committing_dmap = NULL;
 518 
 519         return 0;
 520 }
 521 
 522 static void dirty_map_exit(struct dm_clone_metadata *cmd)
 523 {
 524         __dirty_map_exit(&cmd->dmap[0]);
 525         __dirty_map_exit(&cmd->dmap[1]);
 526 }
 527 
 528 static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
 529 {
 530         int r;
 531         unsigned long i;
 532         struct dm_bitset_cursor c;
 533 
 534         /* Flush bitset cache */
 535         r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
 536         if (r)
 537                 return r;
 538 
 539         r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
 540         if (r)
 541                 return r;
 542 
 543         for (i = 0; ; i++) {
 544                 if (dm_bitset_cursor_get_value(&c))
 545                         __set_bit(i, cmd->region_map);
 546                 else
 547                         __clear_bit(i, cmd->region_map);
 548 
 549                 if (i >= (cmd->nr_regions - 1))
 550                         break;
 551 
 552                 r = dm_bitset_cursor_next(&c);
 553 
 554                 if (r)
 555                         break;
 556         }
 557 
 558         dm_bitset_cursor_end(&c);
 559 
 560         return r;
 561 }
 562 
 563 struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
 564                                                  sector_t target_size,
 565                                                  sector_t region_size)
 566 {
 567         int r;
 568         struct dm_clone_metadata *cmd;
 569 
 570         cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 571         if (!cmd) {
 572                 DMERR("Failed to allocate memory for dm-clone metadata");
 573                 return ERR_PTR(-ENOMEM);
 574         }
 575 
 576         cmd->bdev = bdev;
 577         cmd->target_size = target_size;
 578         cmd->region_size = region_size;
 579         cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
 580         cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
 581 
 582         init_rwsem(&cmd->lock);
 583         spin_lock_init(&cmd->bitmap_lock);
 584         cmd->read_only = 0;
 585         cmd->fail_io = false;
 586         cmd->hydration_done = false;
 587 
 588         cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
 589         if (!cmd->region_map) {
 590                 DMERR("Failed to allocate memory for region bitmap");
 591                 r = -ENOMEM;
 592                 goto out_with_md;
 593         }
 594 
 595         r = __create_persistent_data_structures(cmd, true);
 596         if (r)
 597                 goto out_with_region_map;
 598 
 599         r = __load_bitset_in_core(cmd);
 600         if (r) {
 601                 DMERR("Failed to load on-disk region map");
 602                 goto out_with_pds;
 603         }
 604 
 605         r = dirty_map_init(cmd);
 606         if (r)
 607                 goto out_with_pds;
 608 
 609         if (bitmap_full(cmd->region_map, cmd->nr_regions))
 610                 cmd->hydration_done = true;
 611 
 612         return cmd;
 613 
 614 out_with_pds:
 615         __destroy_persistent_data_structures(cmd);
 616 
 617 out_with_region_map:
 618         kvfree(cmd->region_map);
 619 
 620 out_with_md:
 621         kfree(cmd);
 622 
 623         return ERR_PTR(r);
 624 }
 625 
 626 void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
 627 {
 628         if (!cmd->fail_io)
 629                 __destroy_persistent_data_structures(cmd);
 630 
 631         dirty_map_exit(cmd);
 632         kvfree(cmd->region_map);
 633         kfree(cmd);
 634 }
 635 
 636 bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
 637 {
 638         return cmd->hydration_done;
 639 }
 640 
 641 bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
 642 {
 643         return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
 644 }
 645 
 646 bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
 647                                 unsigned long start, unsigned long nr_regions)
 648 {
 649         unsigned long bit;
 650 
 651         if (dm_clone_is_hydration_done(cmd))
 652                 return true;
 653 
 654         bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
 655 
 656         return (bit >= (start + nr_regions));
 657 }
 658 
 659 unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
 660 {
 661         return bitmap_weight(cmd->region_map, cmd->nr_regions);
 662 }
 663 
 664 unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
 665                                                    unsigned long start)
 666 {
 667         return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
 668 }
 669 
 670 static int __update_metadata_word(struct dm_clone_metadata *cmd,
 671                                   unsigned long *dirty_regions,
 672                                   unsigned long word)
 673 {
 674         int r;
 675         unsigned long index = word * BITS_PER_LONG;
 676         unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
 677 
 678         while (index < max_index) {
 679                 if (test_bit(index, dirty_regions)) {
 680                         r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
 681                                               index, &cmd->bitset_root);
 682                         if (r) {
 683                                 DMERR("dm_bitset_set_bit failed");
 684                                 return r;
 685                         }
 686                         __clear_bit(index, dirty_regions);
 687                 }
 688                 index++;
 689         }
 690 
 691         return 0;
 692 }
 693 
 694 static int __metadata_commit(struct dm_clone_metadata *cmd)
 695 {
 696         int r;
 697         struct dm_block *sblock;
 698         struct superblock_disk *sb;
 699 
 700         /* Flush bitset cache */
 701         r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
 702         if (r) {
 703                 DMERR("dm_bitset_flush failed");
 704                 return r;
 705         }
 706 
 707         /* Flush to disk all blocks, except the superblock */
 708         r = dm_tm_pre_commit(cmd->tm);
 709         if (r) {
 710                 DMERR("dm_tm_pre_commit failed");
 711                 return r;
 712         }
 713 
 714         /* Save the space map root in cmd->metadata_space_map_root */
 715         r = __copy_sm_root(cmd);
 716         if (r) {
 717                 DMERR("__copy_sm_root failed");
 718                 return r;
 719         }
 720 
 721         /* Lock the superblock */
 722         r = superblock_write_lock_zero(cmd, &sblock);
 723         if (r) {
 724                 DMERR("Failed to write_lock superblock");
 725                 return r;
 726         }
 727 
 728         /* Save the metadata in superblock */
 729         sb = dm_block_data(sblock);
 730         __prepare_superblock(cmd, sb);
 731 
 732         /* Unlock superblock and commit it to disk */
 733         r = dm_tm_commit(cmd->tm, sblock);
 734         if (r) {
 735                 DMERR("Failed to commit superblock");
 736                 return r;
 737         }
 738 
 739         /*
 740          * FIXME: Find a more efficient way to check if the hydration is done.
 741          */
 742         if (bitmap_full(cmd->region_map, cmd->nr_regions))
 743                 cmd->hydration_done = true;
 744 
 745         return 0;
 746 }
 747 
 748 static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
 749 {
 750         int r;
 751         unsigned long word;
 752 
 753         word = 0;
 754         do {
 755                 word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
 756 
 757                 if (word == cmd->nr_words)
 758                         break;
 759 
 760                 r = __update_metadata_word(cmd, dmap->dirty_regions, word);
 761 
 762                 if (r)
 763                         return r;
 764 
 765                 __clear_bit(word, dmap->dirty_words);
 766                 word++;
 767         } while (word < cmd->nr_words);
 768 
 769         r = __metadata_commit(cmd);
 770 
 771         if (r)
 772                 return r;
 773 
 774         /* Update the changed flag */
 775         spin_lock_irq(&cmd->bitmap_lock);
 776         dmap->changed = 0;
 777         spin_unlock_irq(&cmd->bitmap_lock);
 778 
 779         return 0;
 780 }
 781 
 782 int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
 783 {
 784         int r = 0;
 785         struct dirty_map *dmap, *next_dmap;
 786 
 787         down_write(&cmd->lock);
 788 
 789         if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
 790                 r = -EPERM;
 791                 goto out;
 792         }
 793 
 794         /* Get current dirty bitmap */
 795         dmap = cmd->current_dmap;
 796 
 797         /* Get next dirty bitmap */
 798         next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
 799 
 800         /*
 801          * The last commit failed, so we don't have a clean dirty-bitmap to
 802          * use.
 803          */
 804         if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
 805                 r = -EINVAL;
 806                 goto out;
 807         }
 808 
 809         /* Swap dirty bitmaps */
 810         spin_lock_irq(&cmd->bitmap_lock);
 811         cmd->current_dmap = next_dmap;
 812         spin_unlock_irq(&cmd->bitmap_lock);
 813 
 814         /* Set old dirty bitmap as currently committing */
 815         cmd->committing_dmap = dmap;
 816 out:
 817         up_write(&cmd->lock);
 818 
 819         return r;
 820 }
 821 
 822 int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
 823 {
 824         int r = -EPERM;
 825 
 826         down_write(&cmd->lock);
 827 
 828         if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
 829                 goto out;
 830 
 831         if (WARN_ON(!cmd->committing_dmap)) {
 832                 r = -EINVAL;
 833                 goto out;
 834         }
 835 
 836         r = __flush_dmap(cmd, cmd->committing_dmap);
 837         if (!r) {
 838                 /* Clear committing dmap */
 839                 cmd->committing_dmap = NULL;
 840         }
 841 out:
 842         up_write(&cmd->lock);
 843 
 844         return r;
 845 }
 846 
 847 int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
 848 {
 849         int r = 0;
 850         struct dirty_map *dmap;
 851         unsigned long word, flags;
 852 
 853         if (unlikely(region_nr >= cmd->nr_regions)) {
 854                 DMERR("Region %lu out of range (total number of regions %lu)",
 855                       region_nr, cmd->nr_regions);
 856                 return -ERANGE;
 857         }
 858 
 859         word = region_nr / BITS_PER_LONG;
 860 
 861         spin_lock_irqsave(&cmd->bitmap_lock, flags);
 862 
 863         if (cmd->read_only) {
 864                 r = -EPERM;
 865                 goto out;
 866         }
 867 
 868         dmap = cmd->current_dmap;
 869 
 870         __set_bit(word, dmap->dirty_words);
 871         __set_bit(region_nr, dmap->dirty_regions);
 872         __set_bit(region_nr, cmd->region_map);
 873         dmap->changed = 1;
 874 
 875 out:
 876         spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 877 
 878         return r;
 879 }
 880 
 881 int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
 882                             unsigned long nr_regions)
 883 {
 884         int r = 0;
 885         struct dirty_map *dmap;
 886         unsigned long word, region_nr;
 887 
 888         if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
 889                      (start + nr_regions) > cmd->nr_regions)) {
 890                 DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
 891                       start, nr_regions, cmd->nr_regions);
 892                 return -ERANGE;
 893         }
 894 
 895         spin_lock_irq(&cmd->bitmap_lock);
 896 
 897         if (cmd->read_only) {
 898                 r = -EPERM;
 899                 goto out;
 900         }
 901 
 902         dmap = cmd->current_dmap;
 903         for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
 904                 if (!test_bit(region_nr, cmd->region_map)) {
 905                         word = region_nr / BITS_PER_LONG;
 906                         __set_bit(word, dmap->dirty_words);
 907                         __set_bit(region_nr, dmap->dirty_regions);
 908                         __set_bit(region_nr, cmd->region_map);
 909                         dmap->changed = 1;
 910                 }
 911         }
 912 out:
 913         spin_unlock_irq(&cmd->bitmap_lock);
 914 
 915         return r;
 916 }
 917 
 918 /*
 919  * WARNING: This must not be called concurrently with either
 920  * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
 921  * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
 922  * exception is after setting the metadata to read-only mode, using
 923  * dm_clone_metadata_set_read_only().
 924  *
 925  * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
 926  * may block.
 927  */
 928 int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
 929 {
 930         int r = -EINVAL;
 931 
 932         down_write(&cmd->lock);
 933 
 934         if (cmd->fail_io)
 935                 goto out;
 936 
 937         r = __load_bitset_in_core(cmd);
 938 out:
 939         up_write(&cmd->lock);
 940 
 941         return r;
 942 }
 943 
 944 bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
 945 {
 946         bool r;
 947         unsigned long flags;
 948 
 949         spin_lock_irqsave(&cmd->bitmap_lock, flags);
 950         r = cmd->dmap[0].changed || cmd->dmap[1].changed;
 951         spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 952 
 953         return r;
 954 }
 955 
 956 int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
 957 {
 958         int r = -EPERM;
 959 
 960         down_write(&cmd->lock);
 961 
 962         if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
 963                 goto out;
 964 
 965         __destroy_persistent_data_structures(cmd);
 966 
 967         r = __create_persistent_data_structures(cmd, false);
 968         if (r) {
 969                 /* If something went wrong we can neither write nor read the metadata */
 970                 cmd->fail_io = true;
 971         }
 972 out:
 973         up_write(&cmd->lock);
 974 
 975         return r;
 976 }
 977 
 978 void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
 979 {
 980         down_write(&cmd->lock);
 981 
 982         spin_lock_irq(&cmd->bitmap_lock);
 983         cmd->read_only = 1;
 984         spin_unlock_irq(&cmd->bitmap_lock);
 985 
 986         if (!cmd->fail_io)
 987                 dm_bm_set_read_only(cmd->bm);
 988 
 989         up_write(&cmd->lock);
 990 }
 991 
 992 void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
 993 {
 994         down_write(&cmd->lock);
 995 
 996         spin_lock_irq(&cmd->bitmap_lock);
 997         cmd->read_only = 0;
 998         spin_unlock_irq(&cmd->bitmap_lock);
 999 
1000         if (!cmd->fail_io)
1001                 dm_bm_set_read_write(cmd->bm);
1002 
1003         up_write(&cmd->lock);
1004 }
1005 
1006 int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
1007                                            dm_block_t *result)
1008 {
1009         int r = -EINVAL;
1010 
1011         down_read(&cmd->lock);
1012 
1013         if (!cmd->fail_io)
1014                 r = dm_sm_get_nr_free(cmd->sm, result);
1015 
1016         up_read(&cmd->lock);
1017 
1018         return r;
1019 }
1020 
1021 int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
1022                                    dm_block_t *result)
1023 {
1024         int r = -EINVAL;
1025 
1026         down_read(&cmd->lock);
1027 
1028         if (!cmd->fail_io)
1029                 r = dm_sm_get_nr_blocks(cmd->sm, result);
1030 
1031         up_read(&cmd->lock);
1032 
1033         return r;
1034 }

/* [<][>][^][v][top][bottom][index][help] */