root/drivers/md/dm-thin-metadata.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sb_prepare_for_write
  2. sb_check
  3. pack_block_time
  4. unpack_block_time
  5. data_block_inc
  6. data_block_dec
  7. data_block_equal
  8. subtree_inc
  9. subtree_dec
  10. subtree_equal
  11. pmd_write_lock_in_core
  12. pmd_write_lock
  13. pmd_write_unlock
  14. superblock_lock_zero
  15. superblock_lock
  16. __superblock_all_zeroes
  17. __setup_btree_details
  18. save_sm_roots
  19. copy_sm_roots
  20. __write_initial_superblock
  21. __format_metadata
  22. __check_incompat_features
  23. __open_metadata
  24. __open_or_format_metadata
  25. __create_persistent_data_objects
  26. __destroy_persistent_data_objects
  27. __begin_transaction
  28. __write_changed_details
  29. __commit_transaction
  30. __set_metadata_reserve
  31. dm_pool_metadata_open
  32. dm_pool_metadata_close
  33. __open_device
  34. __close_device
  35. __create_thin
  36. dm_pool_create_thin
  37. __set_snapshot_details
  38. __create_snap
  39. dm_pool_create_snap
  40. __delete_device
  41. dm_pool_delete_thin_device
  42. dm_pool_set_metadata_transaction_id
  43. dm_pool_get_metadata_transaction_id
  44. __reserve_metadata_snap
  45. dm_pool_reserve_metadata_snap
  46. __release_metadata_snap
  47. dm_pool_release_metadata_snap
  48. __get_metadata_snap
  49. dm_pool_get_metadata_snap
  50. dm_pool_open_thin_device
  51. dm_pool_close_thin_device
  52. dm_thin_dev_id
  53. __snapshotted_since
  54. unpack_lookup_result
  55. __find_block
  56. dm_thin_find_block
  57. __find_next_mapped_block
  58. __find_mapped_range
  59. dm_thin_find_mapped_range
  60. __insert
  61. dm_thin_insert_block
  62. __remove
  63. __remove_range
  64. dm_thin_remove_block
  65. dm_thin_remove_range
  66. dm_pool_block_is_shared
  67. dm_pool_inc_data_range
  68. dm_pool_dec_data_range
  69. dm_thin_changed_this_transaction
  70. dm_pool_changed_this_transaction
  71. dm_thin_aborted_changes
  72. dm_pool_alloc_data_block
  73. dm_pool_commit_metadata
  74. __set_abort_with_changes_flags
  75. dm_pool_abort_metadata
  76. dm_pool_get_free_block_count
  77. dm_pool_get_free_metadata_block_count
  78. dm_pool_get_metadata_dev_size
  79. dm_pool_get_data_dev_size
  80. dm_thin_get_mapped_count
  81. __highest_block
  82. dm_thin_get_highest_mapped_block
  83. __resize_space_map
  84. dm_pool_resize_data_dev
  85. dm_pool_resize_metadata_dev
  86. dm_pool_metadata_read_only
  87. dm_pool_metadata_read_write
  88. dm_pool_register_metadata_threshold
  89. dm_pool_register_pre_commit_callback
  90. dm_pool_metadata_set_needs_check
  91. dm_pool_metadata_needs_check
  92. dm_pool_issue_prefetches

   1 /*
   2  * Copyright (C) 2011-2012 Red Hat, Inc.
   3  *
   4  * This file is released under the GPL.
   5  */
   6 
   7 #include "dm-thin-metadata.h"
   8 #include "persistent-data/dm-btree.h"
   9 #include "persistent-data/dm-space-map.h"
  10 #include "persistent-data/dm-space-map-disk.h"
  11 #include "persistent-data/dm-transaction-manager.h"
  12 
  13 #include <linux/list.h>
  14 #include <linux/device-mapper.h>
  15 #include <linux/workqueue.h>
  16 
  17 /*--------------------------------------------------------------------------
  18  * As far as the metadata goes, there is:
  19  *
  20  * - A superblock in block zero, taking up fewer than 512 bytes for
  21  *   atomic writes.
  22  *
  23  * - A space map managing the metadata blocks.
  24  *
  25  * - A space map managing the data blocks.
  26  *
  27  * - A btree mapping our internal thin dev ids onto struct disk_device_details.
  28  *
  29  * - A hierarchical btree, with 2 levels which effectively maps (thin
  30  *   dev id, virtual block) -> block_time.  Block time is a 64-bit
  31  *   field holding the time in the low 24 bits, and block in the top 48
  32  *   bits.
  33  *
  34  * BTrees consist solely of btree_nodes, that fill a block.  Some are
  35  * internal nodes, as such their values are a __le64 pointing to other
  36  * nodes.  Leaf nodes can store data of any reasonable size (ie. much
  37  * smaller than the block size).  The nodes consist of the header,
  38  * followed by an array of keys, followed by an array of values.  We have
  39  * to binary search on the keys so they're all held together to help the
  40  * cpu cache.
  41  *
  42  * Space maps have 2 btrees:
  43  *
  44  * - One maps a uint64_t onto a struct index_entry.  Which points to a
  45  *   bitmap block, and has some details about how many free entries there
  46  *   are etc.
  47  *
  48  * - The bitmap blocks have a header (for the checksum).  Then the rest
  49  *   of the block is pairs of bits.  With the meaning being:
  50  *
  51  *   0 - ref count is 0
  52  *   1 - ref count is 1
  53  *   2 - ref count is 2
  54  *   3 - ref count is higher than 2
  55  *
  56  * - If the count is higher than 2 then the ref count is entered in a
  57  *   second btree that directly maps the block_address to a uint32_t ref
  58  *   count.
  59  *
  60  * The space map metadata variant doesn't have a bitmaps btree.  Instead
  61  * it has one single blocks worth of index_entries.  This avoids
  62  * recursive issues with the bitmap btree needing to allocate space in
  63  * order to insert.  With a small data block size such as 64k the
  64  * metadata support data devices that are hundreds of terrabytes.
  65  *
  66  * The space maps allocate space linearly from front to back.  Space that
  67  * is freed in a transaction is never recycled within that transaction.
  68  * To try and avoid fragmenting _free_ space the allocator always goes
  69  * back and fills in gaps.
  70  *
  71  * All metadata io is in THIN_METADATA_BLOCK_SIZE sized/aligned chunks
  72  * from the block manager.
  73  *--------------------------------------------------------------------------*/
  74 
  75 #define DM_MSG_PREFIX   "thin metadata"
  76 
  77 #define THIN_SUPERBLOCK_MAGIC 27022010
  78 #define THIN_SUPERBLOCK_LOCATION 0
  79 #define THIN_VERSION 2
  80 #define SECTOR_TO_BLOCK_SHIFT 3
  81 
  82 /*
  83  * For btree insert:
  84  *  3 for btree insert +
  85  *  2 for btree lookup used within space map
  86  * For btree remove:
  87  *  2 for shadow spine +
  88  *  4 for rebalance 3 child node
  89  */
  90 #define THIN_MAX_CONCURRENT_LOCKS 6
  91 
  92 /* This should be plenty */
  93 #define SPACE_MAP_ROOT_SIZE 128
  94 
  95 /*
  96  * Little endian on-disk superblock and device details.
  97  */
  98 struct thin_disk_superblock {
  99         __le32 csum;    /* Checksum of superblock except for this field. */
 100         __le32 flags;
 101         __le64 blocknr; /* This block number, dm_block_t. */
 102 
 103         __u8 uuid[16];
 104         __le64 magic;
 105         __le32 version;
 106         __le32 time;
 107 
 108         __le64 trans_id;
 109 
 110         /*
 111          * Root held by userspace transactions.
 112          */
 113         __le64 held_root;
 114 
 115         __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
 116         __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
 117 
 118         /*
 119          * 2-level btree mapping (dev_id, (dev block, time)) -> data block
 120          */
 121         __le64 data_mapping_root;
 122 
 123         /*
 124          * Device detail root mapping dev_id -> device_details
 125          */
 126         __le64 device_details_root;
 127 
 128         __le32 data_block_size;         /* In 512-byte sectors. */
 129 
 130         __le32 metadata_block_size;     /* In 512-byte sectors. */
 131         __le64 metadata_nr_blocks;
 132 
 133         __le32 compat_flags;
 134         __le32 compat_ro_flags;
 135         __le32 incompat_flags;
 136 } __packed;
 137 
 138 struct disk_device_details {
 139         __le64 mapped_blocks;
 140         __le64 transaction_id;          /* When created. */
 141         __le32 creation_time;
 142         __le32 snapshotted_time;
 143 } __packed;
 144 
 145 struct dm_pool_metadata {
 146         struct hlist_node hash;
 147 
 148         struct block_device *bdev;
 149         struct dm_block_manager *bm;
 150         struct dm_space_map *metadata_sm;
 151         struct dm_space_map *data_sm;
 152         struct dm_transaction_manager *tm;
 153         struct dm_transaction_manager *nb_tm;
 154 
 155         /*
 156          * Two-level btree.
 157          * First level holds thin_dev_t.
 158          * Second level holds mappings.
 159          */
 160         struct dm_btree_info info;
 161 
 162         /*
 163          * Non-blocking version of the above.
 164          */
 165         struct dm_btree_info nb_info;
 166 
 167         /*
 168          * Just the top level for deleting whole devices.
 169          */
 170         struct dm_btree_info tl_info;
 171 
 172         /*
 173          * Just the bottom level for creating new devices.
 174          */
 175         struct dm_btree_info bl_info;
 176 
 177         /*
 178          * Describes the device details btree.
 179          */
 180         struct dm_btree_info details_info;
 181 
 182         struct rw_semaphore root_lock;
 183         uint32_t time;
 184         dm_block_t root;
 185         dm_block_t details_root;
 186         struct list_head thin_devices;
 187         uint64_t trans_id;
 188         unsigned long flags;
 189         sector_t data_block_size;
 190 
 191         /*
 192          * Pre-commit callback.
 193          *
 194          * This allows the thin provisioning target to run a callback before
 195          * the metadata are committed.
 196          */
 197         dm_pool_pre_commit_fn pre_commit_fn;
 198         void *pre_commit_context;
 199 
 200         /*
 201          * We reserve a section of the metadata for commit overhead.
 202          * All reported space does *not* include this.
 203          */
 204         dm_block_t metadata_reserve;
 205 
 206         /*
 207          * Set if a transaction has to be aborted but the attempt to roll back
 208          * to the previous (good) transaction failed.  The only pool metadata
 209          * operation possible in this state is the closing of the device.
 210          */
 211         bool fail_io:1;
 212 
 213         /*
 214          * Set once a thin-pool has been accessed through one of the interfaces
 215          * that imply the pool is in-service (e.g. thin devices created/deleted,
 216          * thin-pool message, metadata snapshots, etc).
 217          */
 218         bool in_service:1;
 219 
 220         /*
 221          * Reading the space map roots can fail, so we read it into these
 222          * buffers before the superblock is locked and updated.
 223          */
 224         __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
 225         __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
 226 };
 227 
 228 struct dm_thin_device {
 229         struct list_head list;
 230         struct dm_pool_metadata *pmd;
 231         dm_thin_id id;
 232 
 233         int open_count;
 234         bool changed:1;
 235         bool aborted_with_changes:1;
 236         uint64_t mapped_blocks;
 237         uint64_t transaction_id;
 238         uint32_t creation_time;
 239         uint32_t snapshotted_time;
 240 };
 241 
 242 /*----------------------------------------------------------------
 243  * superblock validator
 244  *--------------------------------------------------------------*/
 245 
 246 #define SUPERBLOCK_CSUM_XOR 160774
 247 
 248 static void sb_prepare_for_write(struct dm_block_validator *v,
 249                                  struct dm_block *b,
 250                                  size_t block_size)
 251 {
 252         struct thin_disk_superblock *disk_super = dm_block_data(b);
 253 
 254         disk_super->blocknr = cpu_to_le64(dm_block_location(b));
 255         disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
 256                                                       block_size - sizeof(__le32),
 257                                                       SUPERBLOCK_CSUM_XOR));
 258 }
 259 
 260 static int sb_check(struct dm_block_validator *v,
 261                     struct dm_block *b,
 262                     size_t block_size)
 263 {
 264         struct thin_disk_superblock *disk_super = dm_block_data(b);
 265         __le32 csum_le;
 266 
 267         if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
 268                 DMERR("sb_check failed: blocknr %llu: "
 269                       "wanted %llu", le64_to_cpu(disk_super->blocknr),
 270                       (unsigned long long)dm_block_location(b));
 271                 return -ENOTBLK;
 272         }
 273 
 274         if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) {
 275                 DMERR("sb_check failed: magic %llu: "
 276                       "wanted %llu", le64_to_cpu(disk_super->magic),
 277                       (unsigned long long)THIN_SUPERBLOCK_MAGIC);
 278                 return -EILSEQ;
 279         }
 280 
 281         csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
 282                                              block_size - sizeof(__le32),
 283                                              SUPERBLOCK_CSUM_XOR));
 284         if (csum_le != disk_super->csum) {
 285                 DMERR("sb_check failed: csum %u: wanted %u",
 286                       le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
 287                 return -EILSEQ;
 288         }
 289 
 290         return 0;
 291 }
 292 
 293 static struct dm_block_validator sb_validator = {
 294         .name = "superblock",
 295         .prepare_for_write = sb_prepare_for_write,
 296         .check = sb_check
 297 };
 298 
 299 /*----------------------------------------------------------------
 300  * Methods for the btree value types
 301  *--------------------------------------------------------------*/
 302 
 303 static uint64_t pack_block_time(dm_block_t b, uint32_t t)
 304 {
 305         return (b << 24) | t;
 306 }
 307 
 308 static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
 309 {
 310         *b = v >> 24;
 311         *t = v & ((1 << 24) - 1);
 312 }
 313 
 314 static void data_block_inc(void *context, const void *value_le)
 315 {
 316         struct dm_space_map *sm = context;
 317         __le64 v_le;
 318         uint64_t b;
 319         uint32_t t;
 320 
 321         memcpy(&v_le, value_le, sizeof(v_le));
 322         unpack_block_time(le64_to_cpu(v_le), &b, &t);
 323         dm_sm_inc_block(sm, b);
 324 }
 325 
 326 static void data_block_dec(void *context, const void *value_le)
 327 {
 328         struct dm_space_map *sm = context;
 329         __le64 v_le;
 330         uint64_t b;
 331         uint32_t t;
 332 
 333         memcpy(&v_le, value_le, sizeof(v_le));
 334         unpack_block_time(le64_to_cpu(v_le), &b, &t);
 335         dm_sm_dec_block(sm, b);
 336 }
 337 
 338 static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
 339 {
 340         __le64 v1_le, v2_le;
 341         uint64_t b1, b2;
 342         uint32_t t;
 343 
 344         memcpy(&v1_le, value1_le, sizeof(v1_le));
 345         memcpy(&v2_le, value2_le, sizeof(v2_le));
 346         unpack_block_time(le64_to_cpu(v1_le), &b1, &t);
 347         unpack_block_time(le64_to_cpu(v2_le), &b2, &t);
 348 
 349         return b1 == b2;
 350 }
 351 
 352 static void subtree_inc(void *context, const void *value)
 353 {
 354         struct dm_btree_info *info = context;
 355         __le64 root_le;
 356         uint64_t root;
 357 
 358         memcpy(&root_le, value, sizeof(root_le));
 359         root = le64_to_cpu(root_le);
 360         dm_tm_inc(info->tm, root);
 361 }
 362 
 363 static void subtree_dec(void *context, const void *value)
 364 {
 365         struct dm_btree_info *info = context;
 366         __le64 root_le;
 367         uint64_t root;
 368 
 369         memcpy(&root_le, value, sizeof(root_le));
 370         root = le64_to_cpu(root_le);
 371         if (dm_btree_del(info, root))
 372                 DMERR("btree delete failed");
 373 }
 374 
 375 static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
 376 {
 377         __le64 v1_le, v2_le;
 378         memcpy(&v1_le, value1_le, sizeof(v1_le));
 379         memcpy(&v2_le, value2_le, sizeof(v2_le));
 380 
 381         return v1_le == v2_le;
 382 }
 383 
 384 /*----------------------------------------------------------------*/
 385 
 386 /*
 387  * Variant that is used for in-core only changes or code that
 388  * shouldn't put the pool in service on its own (e.g. commit).
 389  */
 390 static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd)
 391         __acquires(pmd->root_lock)
 392 {
 393         down_write(&pmd->root_lock);
 394 }
 395 
 396 static inline void pmd_write_lock(struct dm_pool_metadata *pmd)
 397 {
 398         pmd_write_lock_in_core(pmd);
 399         if (unlikely(!pmd->in_service))
 400                 pmd->in_service = true;
 401 }
 402 
 403 static inline void pmd_write_unlock(struct dm_pool_metadata *pmd)
 404         __releases(pmd->root_lock)
 405 {
 406         up_write(&pmd->root_lock);
 407 }
 408 
 409 /*----------------------------------------------------------------*/
 410 
 411 static int superblock_lock_zero(struct dm_pool_metadata *pmd,
 412                                 struct dm_block **sblock)
 413 {
 414         return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 415                                      &sb_validator, sblock);
 416 }
 417 
 418 static int superblock_lock(struct dm_pool_metadata *pmd,
 419                            struct dm_block **sblock)
 420 {
 421         return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 422                                 &sb_validator, sblock);
 423 }
 424 
 425 static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
 426 {
 427         int r;
 428         unsigned i;
 429         struct dm_block *b;
 430         __le64 *data_le, zero = cpu_to_le64(0);
 431         unsigned block_size = dm_bm_block_size(bm) / sizeof(__le64);
 432 
 433         /*
 434          * We can't use a validator here - it may be all zeroes.
 435          */
 436         r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, NULL, &b);
 437         if (r)
 438                 return r;
 439 
 440         data_le = dm_block_data(b);
 441         *result = 1;
 442         for (i = 0; i < block_size; i++) {
 443                 if (data_le[i] != zero) {
 444                         *result = 0;
 445                         break;
 446                 }
 447         }
 448 
 449         dm_bm_unlock(b);
 450 
 451         return 0;
 452 }
 453 
 454 static void __setup_btree_details(struct dm_pool_metadata *pmd)
 455 {
 456         pmd->info.tm = pmd->tm;
 457         pmd->info.levels = 2;
 458         pmd->info.value_type.context = pmd->data_sm;
 459         pmd->info.value_type.size = sizeof(__le64);
 460         pmd->info.value_type.inc = data_block_inc;
 461         pmd->info.value_type.dec = data_block_dec;
 462         pmd->info.value_type.equal = data_block_equal;
 463 
 464         memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info));
 465         pmd->nb_info.tm = pmd->nb_tm;
 466 
 467         pmd->tl_info.tm = pmd->tm;
 468         pmd->tl_info.levels = 1;
 469         pmd->tl_info.value_type.context = &pmd->bl_info;
 470         pmd->tl_info.value_type.size = sizeof(__le64);
 471         pmd->tl_info.value_type.inc = subtree_inc;
 472         pmd->tl_info.value_type.dec = subtree_dec;
 473         pmd->tl_info.value_type.equal = subtree_equal;
 474 
 475         pmd->bl_info.tm = pmd->tm;
 476         pmd->bl_info.levels = 1;
 477         pmd->bl_info.value_type.context = pmd->data_sm;
 478         pmd->bl_info.value_type.size = sizeof(__le64);
 479         pmd->bl_info.value_type.inc = data_block_inc;
 480         pmd->bl_info.value_type.dec = data_block_dec;
 481         pmd->bl_info.value_type.equal = data_block_equal;
 482 
 483         pmd->details_info.tm = pmd->tm;
 484         pmd->details_info.levels = 1;
 485         pmd->details_info.value_type.context = NULL;
 486         pmd->details_info.value_type.size = sizeof(struct disk_device_details);
 487         pmd->details_info.value_type.inc = NULL;
 488         pmd->details_info.value_type.dec = NULL;
 489         pmd->details_info.value_type.equal = NULL;
 490 }
 491 
 492 static int save_sm_roots(struct dm_pool_metadata *pmd)
 493 {
 494         int r;
 495         size_t len;
 496 
 497         r = dm_sm_root_size(pmd->metadata_sm, &len);
 498         if (r < 0)
 499                 return r;
 500 
 501         r = dm_sm_copy_root(pmd->metadata_sm, &pmd->metadata_space_map_root, len);
 502         if (r < 0)
 503                 return r;
 504 
 505         r = dm_sm_root_size(pmd->data_sm, &len);
 506         if (r < 0)
 507                 return r;
 508 
 509         return dm_sm_copy_root(pmd->data_sm, &pmd->data_space_map_root, len);
 510 }
 511 
 512 static void copy_sm_roots(struct dm_pool_metadata *pmd,
 513                           struct thin_disk_superblock *disk)
 514 {
 515         memcpy(&disk->metadata_space_map_root,
 516                &pmd->metadata_space_map_root,
 517                sizeof(pmd->metadata_space_map_root));
 518 
 519         memcpy(&disk->data_space_map_root,
 520                &pmd->data_space_map_root,
 521                sizeof(pmd->data_space_map_root));
 522 }
 523 
 524 static int __write_initial_superblock(struct dm_pool_metadata *pmd)
 525 {
 526         int r;
 527         struct dm_block *sblock;
 528         struct thin_disk_superblock *disk_super;
 529         sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT;
 530 
 531         if (bdev_size > THIN_METADATA_MAX_SECTORS)
 532                 bdev_size = THIN_METADATA_MAX_SECTORS;
 533 
 534         r = dm_sm_commit(pmd->data_sm);
 535         if (r < 0)
 536                 return r;
 537 
 538         r = dm_tm_pre_commit(pmd->tm);
 539         if (r < 0)
 540                 return r;
 541 
 542         r = save_sm_roots(pmd);
 543         if (r < 0)
 544                 return r;
 545 
 546         r = superblock_lock_zero(pmd, &sblock);
 547         if (r)
 548                 return r;
 549 
 550         disk_super = dm_block_data(sblock);
 551         disk_super->flags = 0;
 552         memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
 553         disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC);
 554         disk_super->version = cpu_to_le32(THIN_VERSION);
 555         disk_super->time = 0;
 556         disk_super->trans_id = 0;
 557         disk_super->held_root = 0;
 558 
 559         copy_sm_roots(pmd, disk_super);
 560 
 561         disk_super->data_mapping_root = cpu_to_le64(pmd->root);
 562         disk_super->device_details_root = cpu_to_le64(pmd->details_root);
 563         disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE);
 564         disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT);
 565         disk_super->data_block_size = cpu_to_le32(pmd->data_block_size);
 566 
 567         return dm_tm_commit(pmd->tm, sblock);
 568 }
 569 
 570 static int __format_metadata(struct dm_pool_metadata *pmd)
 571 {
 572         int r;
 573 
 574         r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 575                                  &pmd->tm, &pmd->metadata_sm);
 576         if (r < 0) {
 577                 DMERR("tm_create_with_sm failed");
 578                 return r;
 579         }
 580 
 581         pmd->data_sm = dm_sm_disk_create(pmd->tm, 0);
 582         if (IS_ERR(pmd->data_sm)) {
 583                 DMERR("sm_disk_create failed");
 584                 r = PTR_ERR(pmd->data_sm);
 585                 goto bad_cleanup_tm;
 586         }
 587 
 588         pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
 589         if (!pmd->nb_tm) {
 590                 DMERR("could not create non-blocking clone tm");
 591                 r = -ENOMEM;
 592                 goto bad_cleanup_data_sm;
 593         }
 594 
 595         __setup_btree_details(pmd);
 596 
 597         r = dm_btree_empty(&pmd->info, &pmd->root);
 598         if (r < 0)
 599                 goto bad_cleanup_nb_tm;
 600 
 601         r = dm_btree_empty(&pmd->details_info, &pmd->details_root);
 602         if (r < 0) {
 603                 DMERR("couldn't create devices root");
 604                 goto bad_cleanup_nb_tm;
 605         }
 606 
 607         r = __write_initial_superblock(pmd);
 608         if (r)
 609                 goto bad_cleanup_nb_tm;
 610 
 611         return 0;
 612 
 613 bad_cleanup_nb_tm:
 614         dm_tm_destroy(pmd->nb_tm);
 615 bad_cleanup_data_sm:
 616         dm_sm_destroy(pmd->data_sm);
 617 bad_cleanup_tm:
 618         dm_tm_destroy(pmd->tm);
 619         dm_sm_destroy(pmd->metadata_sm);
 620 
 621         return r;
 622 }
 623 
 624 static int __check_incompat_features(struct thin_disk_superblock *disk_super,
 625                                      struct dm_pool_metadata *pmd)
 626 {
 627         uint32_t features;
 628 
 629         features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP;
 630         if (features) {
 631                 DMERR("could not access metadata due to unsupported optional features (%lx).",
 632                       (unsigned long)features);
 633                 return -EINVAL;
 634         }
 635 
 636         /*
 637          * Check for read-only metadata to skip the following RDWR checks.
 638          */
 639         if (get_disk_ro(pmd->bdev->bd_disk))
 640                 return 0;
 641 
 642         features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP;
 643         if (features) {
 644                 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
 645                       (unsigned long)features);
 646                 return -EINVAL;
 647         }
 648 
 649         return 0;
 650 }
 651 
 652 static int __open_metadata(struct dm_pool_metadata *pmd)
 653 {
 654         int r;
 655         struct dm_block *sblock;
 656         struct thin_disk_superblock *disk_super;
 657 
 658         r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 659                             &sb_validator, &sblock);
 660         if (r < 0) {
 661                 DMERR("couldn't read superblock");
 662                 return r;
 663         }
 664 
 665         disk_super = dm_block_data(sblock);
 666 
 667         /* Verify the data block size hasn't changed */
 668         if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) {
 669                 DMERR("changing the data block size (from %u to %llu) is not supported",
 670                       le32_to_cpu(disk_super->data_block_size),
 671                       (unsigned long long)pmd->data_block_size);
 672                 r = -EINVAL;
 673                 goto bad_unlock_sblock;
 674         }
 675 
 676         r = __check_incompat_features(disk_super, pmd);
 677         if (r < 0)
 678                 goto bad_unlock_sblock;
 679 
 680         r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 681                                disk_super->metadata_space_map_root,
 682                                sizeof(disk_super->metadata_space_map_root),
 683                                &pmd->tm, &pmd->metadata_sm);
 684         if (r < 0) {
 685                 DMERR("tm_open_with_sm failed");
 686                 goto bad_unlock_sblock;
 687         }
 688 
 689         pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root,
 690                                        sizeof(disk_super->data_space_map_root));
 691         if (IS_ERR(pmd->data_sm)) {
 692                 DMERR("sm_disk_open failed");
 693                 r = PTR_ERR(pmd->data_sm);
 694                 goto bad_cleanup_tm;
 695         }
 696 
 697         pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
 698         if (!pmd->nb_tm) {
 699                 DMERR("could not create non-blocking clone tm");
 700                 r = -ENOMEM;
 701                 goto bad_cleanup_data_sm;
 702         }
 703 
 704         __setup_btree_details(pmd);
 705         dm_bm_unlock(sblock);
 706 
 707         return 0;
 708 
 709 bad_cleanup_data_sm:
 710         dm_sm_destroy(pmd->data_sm);
 711 bad_cleanup_tm:
 712         dm_tm_destroy(pmd->tm);
 713         dm_sm_destroy(pmd->metadata_sm);
 714 bad_unlock_sblock:
 715         dm_bm_unlock(sblock);
 716 
 717         return r;
 718 }
 719 
 720 static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device)
 721 {
 722         int r, unformatted;
 723 
 724         r = __superblock_all_zeroes(pmd->bm, &unformatted);
 725         if (r)
 726                 return r;
 727 
 728         if (unformatted)
 729                 return format_device ? __format_metadata(pmd) : -EPERM;
 730 
 731         return __open_metadata(pmd);
 732 }
 733 
 734 static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device)
 735 {
 736         int r;
 737 
 738         pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
 739                                           THIN_MAX_CONCURRENT_LOCKS);
 740         if (IS_ERR(pmd->bm)) {
 741                 DMERR("could not create block manager");
 742                 return PTR_ERR(pmd->bm);
 743         }
 744 
 745         r = __open_or_format_metadata(pmd, format_device);
 746         if (r)
 747                 dm_block_manager_destroy(pmd->bm);
 748 
 749         return r;
 750 }
 751 
 752 static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
 753 {
 754         dm_sm_destroy(pmd->data_sm);
 755         dm_sm_destroy(pmd->metadata_sm);
 756         dm_tm_destroy(pmd->nb_tm);
 757         dm_tm_destroy(pmd->tm);
 758         dm_block_manager_destroy(pmd->bm);
 759 }
 760 
 761 static int __begin_transaction(struct dm_pool_metadata *pmd)
 762 {
 763         int r;
 764         struct thin_disk_superblock *disk_super;
 765         struct dm_block *sblock;
 766 
 767         /*
 768          * We re-read the superblock every time.  Shouldn't need to do this
 769          * really.
 770          */
 771         r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 772                             &sb_validator, &sblock);
 773         if (r)
 774                 return r;
 775 
 776         disk_super = dm_block_data(sblock);
 777         pmd->time = le32_to_cpu(disk_super->time);
 778         pmd->root = le64_to_cpu(disk_super->data_mapping_root);
 779         pmd->details_root = le64_to_cpu(disk_super->device_details_root);
 780         pmd->trans_id = le64_to_cpu(disk_super->trans_id);
 781         pmd->flags = le32_to_cpu(disk_super->flags);
 782         pmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
 783 
 784         dm_bm_unlock(sblock);
 785         return 0;
 786 }
 787 
 788 static int __write_changed_details(struct dm_pool_metadata *pmd)
 789 {
 790         int r;
 791         struct dm_thin_device *td, *tmp;
 792         struct disk_device_details details;
 793         uint64_t key;
 794 
 795         list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
 796                 if (!td->changed)
 797                         continue;
 798 
 799                 key = td->id;
 800 
 801                 details.mapped_blocks = cpu_to_le64(td->mapped_blocks);
 802                 details.transaction_id = cpu_to_le64(td->transaction_id);
 803                 details.creation_time = cpu_to_le32(td->creation_time);
 804                 details.snapshotted_time = cpu_to_le32(td->snapshotted_time);
 805                 __dm_bless_for_disk(&details);
 806 
 807                 r = dm_btree_insert(&pmd->details_info, pmd->details_root,
 808                                     &key, &details, &pmd->details_root);
 809                 if (r)
 810                         return r;
 811 
 812                 if (td->open_count)
 813                         td->changed = 0;
 814                 else {
 815                         list_del(&td->list);
 816                         kfree(td);
 817                 }
 818         }
 819 
 820         return 0;
 821 }
 822 
 823 static int __commit_transaction(struct dm_pool_metadata *pmd)
 824 {
 825         int r;
 826         struct thin_disk_superblock *disk_super;
 827         struct dm_block *sblock;
 828 
 829         /*
 830          * We need to know if the thin_disk_superblock exceeds a 512-byte sector.
 831          */
 832         BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
 833         BUG_ON(!rwsem_is_locked(&pmd->root_lock));
 834 
 835         if (unlikely(!pmd->in_service))
 836                 return 0;
 837 
 838         if (pmd->pre_commit_fn) {
 839                 r = pmd->pre_commit_fn(pmd->pre_commit_context);
 840                 if (r < 0) {
 841                         DMERR("pre-commit callback failed");
 842                         return r;
 843                 }
 844         }
 845 
 846         r = __write_changed_details(pmd);
 847         if (r < 0)
 848                 return r;
 849 
 850         r = dm_sm_commit(pmd->data_sm);
 851         if (r < 0)
 852                 return r;
 853 
 854         r = dm_tm_pre_commit(pmd->tm);
 855         if (r < 0)
 856                 return r;
 857 
 858         r = save_sm_roots(pmd);
 859         if (r < 0)
 860                 return r;
 861 
 862         r = superblock_lock(pmd, &sblock);
 863         if (r)
 864                 return r;
 865 
 866         disk_super = dm_block_data(sblock);
 867         disk_super->time = cpu_to_le32(pmd->time);
 868         disk_super->data_mapping_root = cpu_to_le64(pmd->root);
 869         disk_super->device_details_root = cpu_to_le64(pmd->details_root);
 870         disk_super->trans_id = cpu_to_le64(pmd->trans_id);
 871         disk_super->flags = cpu_to_le32(pmd->flags);
 872 
 873         copy_sm_roots(pmd, disk_super);
 874 
 875         return dm_tm_commit(pmd->tm, sblock);
 876 }
 877 
 878 static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
 879 {
 880         int r;
 881         dm_block_t total;
 882         dm_block_t max_blocks = 4096; /* 16M */
 883 
 884         r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total);
 885         if (r) {
 886                 DMERR("could not get size of metadata device");
 887                 pmd->metadata_reserve = max_blocks;
 888         } else
 889                 pmd->metadata_reserve = min(max_blocks, div_u64(total, 10));
 890 }
 891 
 892 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
 893                                                sector_t data_block_size,
 894                                                bool format_device)
 895 {
 896         int r;
 897         struct dm_pool_metadata *pmd;
 898 
 899         pmd = kmalloc(sizeof(*pmd), GFP_KERNEL);
 900         if (!pmd) {
 901                 DMERR("could not allocate metadata struct");
 902                 return ERR_PTR(-ENOMEM);
 903         }
 904 
 905         init_rwsem(&pmd->root_lock);
 906         pmd->time = 0;
 907         INIT_LIST_HEAD(&pmd->thin_devices);
 908         pmd->fail_io = false;
 909         pmd->in_service = false;
 910         pmd->bdev = bdev;
 911         pmd->data_block_size = data_block_size;
 912         pmd->pre_commit_fn = NULL;
 913         pmd->pre_commit_context = NULL;
 914 
 915         r = __create_persistent_data_objects(pmd, format_device);
 916         if (r) {
 917                 kfree(pmd);
 918                 return ERR_PTR(r);
 919         }
 920 
 921         r = __begin_transaction(pmd);
 922         if (r < 0) {
 923                 if (dm_pool_metadata_close(pmd) < 0)
 924                         DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
 925                 return ERR_PTR(r);
 926         }
 927 
 928         __set_metadata_reserve(pmd);
 929 
 930         return pmd;
 931 }
 932 
 933 int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
 934 {
 935         int r;
 936         unsigned open_devices = 0;
 937         struct dm_thin_device *td, *tmp;
 938 
 939         down_read(&pmd->root_lock);
 940         list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
 941                 if (td->open_count)
 942                         open_devices++;
 943                 else {
 944                         list_del(&td->list);
 945                         kfree(td);
 946                 }
 947         }
 948         up_read(&pmd->root_lock);
 949 
 950         if (open_devices) {
 951                 DMERR("attempt to close pmd when %u device(s) are still open",
 952                        open_devices);
 953                 return -EBUSY;
 954         }
 955 
 956         pmd_write_lock_in_core(pmd);
 957         if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) {
 958                 r = __commit_transaction(pmd);
 959                 if (r < 0)
 960                         DMWARN("%s: __commit_transaction() failed, error = %d",
 961                                __func__, r);
 962         }
 963         pmd_write_unlock(pmd);
 964         if (!pmd->fail_io)
 965                 __destroy_persistent_data_objects(pmd);
 966 
 967         kfree(pmd);
 968         return 0;
 969 }
 970 
 971 /*
 972  * __open_device: Returns @td corresponding to device with id @dev,
 973  * creating it if @create is set and incrementing @td->open_count.
 974  * On failure, @td is undefined.
 975  */
 976 static int __open_device(struct dm_pool_metadata *pmd,
 977                          dm_thin_id dev, int create,
 978                          struct dm_thin_device **td)
 979 {
 980         int r, changed = 0;
 981         struct dm_thin_device *td2;
 982         uint64_t key = dev;
 983         struct disk_device_details details_le;
 984 
 985         /*
 986          * If the device is already open, return it.
 987          */
 988         list_for_each_entry(td2, &pmd->thin_devices, list)
 989                 if (td2->id == dev) {
 990                         /*
 991                          * May not create an already-open device.
 992                          */
 993                         if (create)
 994                                 return -EEXIST;
 995 
 996                         td2->open_count++;
 997                         *td = td2;
 998                         return 0;
 999                 }
1000 
1001         /*
1002          * Check the device exists.
1003          */
1004         r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1005                             &key, &details_le);
1006         if (r) {
1007                 if (r != -ENODATA || !create)
1008                         return r;
1009 
1010                 /*
1011                  * Create new device.
1012                  */
1013                 changed = 1;
1014                 details_le.mapped_blocks = 0;
1015                 details_le.transaction_id = cpu_to_le64(pmd->trans_id);
1016                 details_le.creation_time = cpu_to_le32(pmd->time);
1017                 details_le.snapshotted_time = cpu_to_le32(pmd->time);
1018         }
1019 
1020         *td = kmalloc(sizeof(**td), GFP_NOIO);
1021         if (!*td)
1022                 return -ENOMEM;
1023 
1024         (*td)->pmd = pmd;
1025         (*td)->id = dev;
1026         (*td)->open_count = 1;
1027         (*td)->changed = changed;
1028         (*td)->aborted_with_changes = false;
1029         (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks);
1030         (*td)->transaction_id = le64_to_cpu(details_le.transaction_id);
1031         (*td)->creation_time = le32_to_cpu(details_le.creation_time);
1032         (*td)->snapshotted_time = le32_to_cpu(details_le.snapshotted_time);
1033 
1034         list_add(&(*td)->list, &pmd->thin_devices);
1035 
1036         return 0;
1037 }
1038 
1039 static void __close_device(struct dm_thin_device *td)
1040 {
1041         --td->open_count;
1042 }
1043 
1044 static int __create_thin(struct dm_pool_metadata *pmd,
1045                          dm_thin_id dev)
1046 {
1047         int r;
1048         dm_block_t dev_root;
1049         uint64_t key = dev;
1050         struct disk_device_details details_le;
1051         struct dm_thin_device *td;
1052         __le64 value;
1053 
1054         r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1055                             &key, &details_le);
1056         if (!r)
1057                 return -EEXIST;
1058 
1059         /*
1060          * Create an empty btree for the mappings.
1061          */
1062         r = dm_btree_empty(&pmd->bl_info, &dev_root);
1063         if (r)
1064                 return r;
1065 
1066         /*
1067          * Insert it into the main mapping tree.
1068          */
1069         value = cpu_to_le64(dev_root);
1070         __dm_bless_for_disk(&value);
1071         r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
1072         if (r) {
1073                 dm_btree_del(&pmd->bl_info, dev_root);
1074                 return r;
1075         }
1076 
1077         r = __open_device(pmd, dev, 1, &td);
1078         if (r) {
1079                 dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1080                 dm_btree_del(&pmd->bl_info, dev_root);
1081                 return r;
1082         }
1083         __close_device(td);
1084 
1085         return r;
1086 }
1087 
1088 int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev)
1089 {
1090         int r = -EINVAL;
1091 
1092         pmd_write_lock(pmd);
1093         if (!pmd->fail_io)
1094                 r = __create_thin(pmd, dev);
1095         pmd_write_unlock(pmd);
1096 
1097         return r;
1098 }
1099 
1100 static int __set_snapshot_details(struct dm_pool_metadata *pmd,
1101                                   struct dm_thin_device *snap,
1102                                   dm_thin_id origin, uint32_t time)
1103 {
1104         int r;
1105         struct dm_thin_device *td;
1106 
1107         r = __open_device(pmd, origin, 0, &td);
1108         if (r)
1109                 return r;
1110 
1111         td->changed = 1;
1112         td->snapshotted_time = time;
1113 
1114         snap->mapped_blocks = td->mapped_blocks;
1115         snap->snapshotted_time = time;
1116         __close_device(td);
1117 
1118         return 0;
1119 }
1120 
1121 static int __create_snap(struct dm_pool_metadata *pmd,
1122                          dm_thin_id dev, dm_thin_id origin)
1123 {
1124         int r;
1125         dm_block_t origin_root;
1126         uint64_t key = origin, dev_key = dev;
1127         struct dm_thin_device *td;
1128         struct disk_device_details details_le;
1129         __le64 value;
1130 
1131         /* check this device is unused */
1132         r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1133                             &dev_key, &details_le);
1134         if (!r)
1135                 return -EEXIST;
1136 
1137         /* find the mapping tree for the origin */
1138         r = dm_btree_lookup(&pmd->tl_info, pmd->root, &key, &value);
1139         if (r)
1140                 return r;
1141         origin_root = le64_to_cpu(value);
1142 
1143         /* clone the origin, an inc will do */
1144         dm_tm_inc(pmd->tm, origin_root);
1145 
1146         /* insert into the main mapping tree */
1147         value = cpu_to_le64(origin_root);
1148         __dm_bless_for_disk(&value);
1149         key = dev;
1150         r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
1151         if (r) {
1152                 dm_tm_dec(pmd->tm, origin_root);
1153                 return r;
1154         }
1155 
1156         pmd->time++;
1157 
1158         r = __open_device(pmd, dev, 1, &td);
1159         if (r)
1160                 goto bad;
1161 
1162         r = __set_snapshot_details(pmd, td, origin, pmd->time);
1163         __close_device(td);
1164 
1165         if (r)
1166                 goto bad;
1167 
1168         return 0;
1169 
1170 bad:
1171         dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1172         dm_btree_remove(&pmd->details_info, pmd->details_root,
1173                         &key, &pmd->details_root);
1174         return r;
1175 }
1176 
1177 int dm_pool_create_snap(struct dm_pool_metadata *pmd,
1178                                  dm_thin_id dev,
1179                                  dm_thin_id origin)
1180 {
1181         int r = -EINVAL;
1182 
1183         pmd_write_lock(pmd);
1184         if (!pmd->fail_io)
1185                 r = __create_snap(pmd, dev, origin);
1186         pmd_write_unlock(pmd);
1187 
1188         return r;
1189 }
1190 
1191 static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev)
1192 {
1193         int r;
1194         uint64_t key = dev;
1195         struct dm_thin_device *td;
1196 
1197         /* TODO: failure should mark the transaction invalid */
1198         r = __open_device(pmd, dev, 0, &td);
1199         if (r)
1200                 return r;
1201 
1202         if (td->open_count > 1) {
1203                 __close_device(td);
1204                 return -EBUSY;
1205         }
1206 
1207         list_del(&td->list);
1208         kfree(td);
1209         r = dm_btree_remove(&pmd->details_info, pmd->details_root,
1210                             &key, &pmd->details_root);
1211         if (r)
1212                 return r;
1213 
1214         r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1215         if (r)
1216                 return r;
1217 
1218         return 0;
1219 }
1220 
1221 int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
1222                                dm_thin_id dev)
1223 {
1224         int r = -EINVAL;
1225 
1226         pmd_write_lock(pmd);
1227         if (!pmd->fail_io)
1228                 r = __delete_device(pmd, dev);
1229         pmd_write_unlock(pmd);
1230 
1231         return r;
1232 }
1233 
1234 int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
1235                                         uint64_t current_id,
1236                                         uint64_t new_id)
1237 {
1238         int r = -EINVAL;
1239 
1240         pmd_write_lock(pmd);
1241 
1242         if (pmd->fail_io)
1243                 goto out;
1244 
1245         if (pmd->trans_id != current_id) {
1246                 DMERR("mismatched transaction id");
1247                 goto out;
1248         }
1249 
1250         pmd->trans_id = new_id;
1251         r = 0;
1252 
1253 out:
1254         pmd_write_unlock(pmd);
1255 
1256         return r;
1257 }
1258 
1259 int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
1260                                         uint64_t *result)
1261 {
1262         int r = -EINVAL;
1263 
1264         down_read(&pmd->root_lock);
1265         if (!pmd->fail_io) {
1266                 *result = pmd->trans_id;
1267                 r = 0;
1268         }
1269         up_read(&pmd->root_lock);
1270 
1271         return r;
1272 }
1273 
1274 static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
1275 {
1276         int r, inc;
1277         struct thin_disk_superblock *disk_super;
1278         struct dm_block *copy, *sblock;
1279         dm_block_t held_root;
1280 
1281         /*
1282          * We commit to ensure the btree roots which we increment in a
1283          * moment are up to date.
1284          */
1285         r = __commit_transaction(pmd);
1286         if (r < 0) {
1287                 DMWARN("%s: __commit_transaction() failed, error = %d",
1288                        __func__, r);
1289                 return r;
1290         }
1291 
1292         /*
1293          * Copy the superblock.
1294          */
1295         dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
1296         r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
1297                                &sb_validator, &copy, &inc);
1298         if (r)
1299                 return r;
1300 
1301         BUG_ON(!inc);
1302 
1303         held_root = dm_block_location(copy);
1304         disk_super = dm_block_data(copy);
1305 
1306         if (le64_to_cpu(disk_super->held_root)) {
1307                 DMWARN("Pool metadata snapshot already exists: release this before taking another.");
1308 
1309                 dm_tm_dec(pmd->tm, held_root);
1310                 dm_tm_unlock(pmd->tm, copy);
1311                 return -EBUSY;
1312         }
1313 
1314         /*
1315          * Wipe the spacemap since we're not publishing this.
1316          */
1317         memset(&disk_super->data_space_map_root, 0,
1318                sizeof(disk_super->data_space_map_root));
1319         memset(&disk_super->metadata_space_map_root, 0,
1320                sizeof(disk_super->metadata_space_map_root));
1321 
1322         /*
1323          * Increment the data structures that need to be preserved.
1324          */
1325         dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
1326         dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
1327         dm_tm_unlock(pmd->tm, copy);
1328 
1329         /*
1330          * Write the held root into the superblock.
1331          */
1332         r = superblock_lock(pmd, &sblock);
1333         if (r) {
1334                 dm_tm_dec(pmd->tm, held_root);
1335                 return r;
1336         }
1337 
1338         disk_super = dm_block_data(sblock);
1339         disk_super->held_root = cpu_to_le64(held_root);
1340         dm_bm_unlock(sblock);
1341         return 0;
1342 }
1343 
1344 int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
1345 {
1346         int r = -EINVAL;
1347 
1348         pmd_write_lock(pmd);
1349         if (!pmd->fail_io)
1350                 r = __reserve_metadata_snap(pmd);
1351         pmd_write_unlock(pmd);
1352 
1353         return r;
1354 }
1355 
1356 static int __release_metadata_snap(struct dm_pool_metadata *pmd)
1357 {
1358         int r;
1359         struct thin_disk_superblock *disk_super;
1360         struct dm_block *sblock, *copy;
1361         dm_block_t held_root;
1362 
1363         r = superblock_lock(pmd, &sblock);
1364         if (r)
1365                 return r;
1366 
1367         disk_super = dm_block_data(sblock);
1368         held_root = le64_to_cpu(disk_super->held_root);
1369         disk_super->held_root = cpu_to_le64(0);
1370 
1371         dm_bm_unlock(sblock);
1372 
1373         if (!held_root) {
1374                 DMWARN("No pool metadata snapshot found: nothing to release.");
1375                 return -EINVAL;
1376         }
1377 
1378         r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
1379         if (r)
1380                 return r;
1381 
1382         disk_super = dm_block_data(copy);
1383         dm_btree_del(&pmd->info, le64_to_cpu(disk_super->data_mapping_root));
1384         dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root));
1385         dm_sm_dec_block(pmd->metadata_sm, held_root);
1386 
1387         dm_tm_unlock(pmd->tm, copy);
1388 
1389         return 0;
1390 }
1391 
1392 int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
1393 {
1394         int r = -EINVAL;
1395 
1396         pmd_write_lock(pmd);
1397         if (!pmd->fail_io)
1398                 r = __release_metadata_snap(pmd);
1399         pmd_write_unlock(pmd);
1400 
1401         return r;
1402 }
1403 
1404 static int __get_metadata_snap(struct dm_pool_metadata *pmd,
1405                                dm_block_t *result)
1406 {
1407         int r;
1408         struct thin_disk_superblock *disk_super;
1409         struct dm_block *sblock;
1410 
1411         r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1412                             &sb_validator, &sblock);
1413         if (r)
1414                 return r;
1415 
1416         disk_super = dm_block_data(sblock);
1417         *result = le64_to_cpu(disk_super->held_root);
1418 
1419         dm_bm_unlock(sblock);
1420 
1421         return 0;
1422 }
1423 
1424 int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
1425                               dm_block_t *result)
1426 {
1427         int r = -EINVAL;
1428 
1429         down_read(&pmd->root_lock);
1430         if (!pmd->fail_io)
1431                 r = __get_metadata_snap(pmd, result);
1432         up_read(&pmd->root_lock);
1433 
1434         return r;
1435 }
1436 
1437 int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
1438                              struct dm_thin_device **td)
1439 {
1440         int r = -EINVAL;
1441 
1442         pmd_write_lock_in_core(pmd);
1443         if (!pmd->fail_io)
1444                 r = __open_device(pmd, dev, 0, td);
1445         pmd_write_unlock(pmd);
1446 
1447         return r;
1448 }
1449 
1450 int dm_pool_close_thin_device(struct dm_thin_device *td)
1451 {
1452         pmd_write_lock_in_core(td->pmd);
1453         __close_device(td);
1454         pmd_write_unlock(td->pmd);
1455 
1456         return 0;
1457 }
1458 
1459 dm_thin_id dm_thin_dev_id(struct dm_thin_device *td)
1460 {
1461         return td->id;
1462 }
1463 
1464 /*
1465  * Check whether @time (of block creation) is older than @td's last snapshot.
1466  * If so then the associated block is shared with the last snapshot device.
1467  * Any block on a device created *after* the device last got snapshotted is
1468  * necessarily not shared.
1469  */
1470 static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
1471 {
1472         return td->snapshotted_time > time;
1473 }
1474 
1475 static void unpack_lookup_result(struct dm_thin_device *td, __le64 value,
1476                                  struct dm_thin_lookup_result *result)
1477 {
1478         uint64_t block_time = 0;
1479         dm_block_t exception_block;
1480         uint32_t exception_time;
1481 
1482         block_time = le64_to_cpu(value);
1483         unpack_block_time(block_time, &exception_block, &exception_time);
1484         result->block = exception_block;
1485         result->shared = __snapshotted_since(td, exception_time);
1486 }
1487 
1488 static int __find_block(struct dm_thin_device *td, dm_block_t block,
1489                         int can_issue_io, struct dm_thin_lookup_result *result)
1490 {
1491         int r;
1492         __le64 value;
1493         struct dm_pool_metadata *pmd = td->pmd;
1494         dm_block_t keys[2] = { td->id, block };
1495         struct dm_btree_info *info;
1496 
1497         if (can_issue_io) {
1498                 info = &pmd->info;
1499         } else
1500                 info = &pmd->nb_info;
1501 
1502         r = dm_btree_lookup(info, pmd->root, keys, &value);
1503         if (!r)
1504                 unpack_lookup_result(td, value, result);
1505 
1506         return r;
1507 }
1508 
1509 int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
1510                        int can_issue_io, struct dm_thin_lookup_result *result)
1511 {
1512         int r;
1513         struct dm_pool_metadata *pmd = td->pmd;
1514 
1515         down_read(&pmd->root_lock);
1516         if (pmd->fail_io) {
1517                 up_read(&pmd->root_lock);
1518                 return -EINVAL;
1519         }
1520 
1521         r = __find_block(td, block, can_issue_io, result);
1522 
1523         up_read(&pmd->root_lock);
1524         return r;
1525 }
1526 
1527 static int __find_next_mapped_block(struct dm_thin_device *td, dm_block_t block,
1528                                           dm_block_t *vblock,
1529                                           struct dm_thin_lookup_result *result)
1530 {
1531         int r;
1532         __le64 value;
1533         struct dm_pool_metadata *pmd = td->pmd;
1534         dm_block_t keys[2] = { td->id, block };
1535 
1536         r = dm_btree_lookup_next(&pmd->info, pmd->root, keys, vblock, &value);
1537         if (!r)
1538                 unpack_lookup_result(td, value, result);
1539 
1540         return r;
1541 }
1542 
1543 static int __find_mapped_range(struct dm_thin_device *td,
1544                                dm_block_t begin, dm_block_t end,
1545                                dm_block_t *thin_begin, dm_block_t *thin_end,
1546                                dm_block_t *pool_begin, bool *maybe_shared)
1547 {
1548         int r;
1549         dm_block_t pool_end;
1550         struct dm_thin_lookup_result lookup;
1551 
1552         if (end < begin)
1553                 return -ENODATA;
1554 
1555         r = __find_next_mapped_block(td, begin, &begin, &lookup);
1556         if (r)
1557                 return r;
1558 
1559         if (begin >= end)
1560                 return -ENODATA;
1561 
1562         *thin_begin = begin;
1563         *pool_begin = lookup.block;
1564         *maybe_shared = lookup.shared;
1565 
1566         begin++;
1567         pool_end = *pool_begin + 1;
1568         while (begin != end) {
1569                 r = __find_block(td, begin, true, &lookup);
1570                 if (r) {
1571                         if (r == -ENODATA)
1572                                 break;
1573                         else
1574                                 return r;
1575                 }
1576 
1577                 if ((lookup.block != pool_end) ||
1578                     (lookup.shared != *maybe_shared))
1579                         break;
1580 
1581                 pool_end++;
1582                 begin++;
1583         }
1584 
1585         *thin_end = begin;
1586         return 0;
1587 }
1588 
1589 int dm_thin_find_mapped_range(struct dm_thin_device *td,
1590                               dm_block_t begin, dm_block_t end,
1591                               dm_block_t *thin_begin, dm_block_t *thin_end,
1592                               dm_block_t *pool_begin, bool *maybe_shared)
1593 {
1594         int r = -EINVAL;
1595         struct dm_pool_metadata *pmd = td->pmd;
1596 
1597         down_read(&pmd->root_lock);
1598         if (!pmd->fail_io) {
1599                 r = __find_mapped_range(td, begin, end, thin_begin, thin_end,
1600                                         pool_begin, maybe_shared);
1601         }
1602         up_read(&pmd->root_lock);
1603 
1604         return r;
1605 }
1606 
1607 static int __insert(struct dm_thin_device *td, dm_block_t block,
1608                     dm_block_t data_block)
1609 {
1610         int r, inserted;
1611         __le64 value;
1612         struct dm_pool_metadata *pmd = td->pmd;
1613         dm_block_t keys[2] = { td->id, block };
1614 
1615         value = cpu_to_le64(pack_block_time(data_block, pmd->time));
1616         __dm_bless_for_disk(&value);
1617 
1618         r = dm_btree_insert_notify(&pmd->info, pmd->root, keys, &value,
1619                                    &pmd->root, &inserted);
1620         if (r)
1621                 return r;
1622 
1623         td->changed = 1;
1624         if (inserted)
1625                 td->mapped_blocks++;
1626 
1627         return 0;
1628 }
1629 
1630 int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
1631                          dm_block_t data_block)
1632 {
1633         int r = -EINVAL;
1634 
1635         pmd_write_lock(td->pmd);
1636         if (!td->pmd->fail_io)
1637                 r = __insert(td, block, data_block);
1638         pmd_write_unlock(td->pmd);
1639 
1640         return r;
1641 }
1642 
1643 static int __remove(struct dm_thin_device *td, dm_block_t block)
1644 {
1645         int r;
1646         struct dm_pool_metadata *pmd = td->pmd;
1647         dm_block_t keys[2] = { td->id, block };
1648 
1649         r = dm_btree_remove(&pmd->info, pmd->root, keys, &pmd->root);
1650         if (r)
1651                 return r;
1652 
1653         td->mapped_blocks--;
1654         td->changed = 1;
1655 
1656         return 0;
1657 }
1658 
1659 static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end)
1660 {
1661         int r;
1662         unsigned count, total_count = 0;
1663         struct dm_pool_metadata *pmd = td->pmd;
1664         dm_block_t keys[1] = { td->id };
1665         __le64 value;
1666         dm_block_t mapping_root;
1667 
1668         /*
1669          * Find the mapping tree
1670          */
1671         r = dm_btree_lookup(&pmd->tl_info, pmd->root, keys, &value);
1672         if (r)
1673                 return r;
1674 
1675         /*
1676          * Remove from the mapping tree, taking care to inc the
1677          * ref count so it doesn't get deleted.
1678          */
1679         mapping_root = le64_to_cpu(value);
1680         dm_tm_inc(pmd->tm, mapping_root);
1681         r = dm_btree_remove(&pmd->tl_info, pmd->root, keys, &pmd->root);
1682         if (r)
1683                 return r;
1684 
1685         /*
1686          * Remove leaves stops at the first unmapped entry, so we have to
1687          * loop round finding mapped ranges.
1688          */
1689         while (begin < end) {
1690                 r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value);
1691                 if (r == -ENODATA)
1692                         break;
1693 
1694                 if (r)
1695                         return r;
1696 
1697                 if (begin >= end)
1698                         break;
1699 
1700                 r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
1701                 if (r)
1702                         return r;
1703 
1704                 total_count += count;
1705         }
1706 
1707         td->mapped_blocks -= total_count;
1708         td->changed = 1;
1709 
1710         /*
1711          * Reinsert the mapping tree.
1712          */
1713         value = cpu_to_le64(mapping_root);
1714         __dm_bless_for_disk(&value);
1715         return dm_btree_insert(&pmd->tl_info, pmd->root, keys, &value, &pmd->root);
1716 }
1717 
1718 int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block)
1719 {
1720         int r = -EINVAL;
1721 
1722         pmd_write_lock(td->pmd);
1723         if (!td->pmd->fail_io)
1724                 r = __remove(td, block);
1725         pmd_write_unlock(td->pmd);
1726 
1727         return r;
1728 }
1729 
1730 int dm_thin_remove_range(struct dm_thin_device *td,
1731                          dm_block_t begin, dm_block_t end)
1732 {
1733         int r = -EINVAL;
1734 
1735         pmd_write_lock(td->pmd);
1736         if (!td->pmd->fail_io)
1737                 r = __remove_range(td, begin, end);
1738         pmd_write_unlock(td->pmd);
1739 
1740         return r;
1741 }
1742 
1743 int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
1744 {
1745         int r;
1746         uint32_t ref_count;
1747 
1748         down_read(&pmd->root_lock);
1749         r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
1750         if (!r)
1751                 *result = (ref_count > 1);
1752         up_read(&pmd->root_lock);
1753 
1754         return r;
1755 }
1756 
1757 int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
1758 {
1759         int r = 0;
1760 
1761         pmd_write_lock(pmd);
1762         for (; b != e; b++) {
1763                 r = dm_sm_inc_block(pmd->data_sm, b);
1764                 if (r)
1765                         break;
1766         }
1767         pmd_write_unlock(pmd);
1768 
1769         return r;
1770 }
1771 
1772 int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
1773 {
1774         int r = 0;
1775 
1776         pmd_write_lock(pmd);
1777         for (; b != e; b++) {
1778                 r = dm_sm_dec_block(pmd->data_sm, b);
1779                 if (r)
1780                         break;
1781         }
1782         pmd_write_unlock(pmd);
1783 
1784         return r;
1785 }
1786 
1787 bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
1788 {
1789         int r;
1790 
1791         down_read(&td->pmd->root_lock);
1792         r = td->changed;
1793         up_read(&td->pmd->root_lock);
1794 
1795         return r;
1796 }
1797 
1798 bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd)
1799 {
1800         bool r = false;
1801         struct dm_thin_device *td, *tmp;
1802 
1803         down_read(&pmd->root_lock);
1804         list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
1805                 if (td->changed) {
1806                         r = td->changed;
1807                         break;
1808                 }
1809         }
1810         up_read(&pmd->root_lock);
1811 
1812         return r;
1813 }
1814 
1815 bool dm_thin_aborted_changes(struct dm_thin_device *td)
1816 {
1817         bool r;
1818 
1819         down_read(&td->pmd->root_lock);
1820         r = td->aborted_with_changes;
1821         up_read(&td->pmd->root_lock);
1822 
1823         return r;
1824 }
1825 
1826 int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
1827 {
1828         int r = -EINVAL;
1829 
1830         pmd_write_lock(pmd);
1831         if (!pmd->fail_io)
1832                 r = dm_sm_new_block(pmd->data_sm, result);
1833         pmd_write_unlock(pmd);
1834 
1835         return r;
1836 }
1837 
1838 int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
1839 {
1840         int r = -EINVAL;
1841 
1842         /*
1843          * Care is taken to not have commit be what
1844          * triggers putting the thin-pool in-service.
1845          */
1846         pmd_write_lock_in_core(pmd);
1847         if (pmd->fail_io)
1848                 goto out;
1849 
1850         r = __commit_transaction(pmd);
1851         if (r < 0)
1852                 goto out;
1853 
1854         /*
1855          * Open the next transaction.
1856          */
1857         r = __begin_transaction(pmd);
1858 out:
1859         pmd_write_unlock(pmd);
1860         return r;
1861 }
1862 
1863 static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
1864 {
1865         struct dm_thin_device *td;
1866 
1867         list_for_each_entry(td, &pmd->thin_devices, list)
1868                 td->aborted_with_changes = td->changed;
1869 }
1870 
1871 int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
1872 {
1873         int r = -EINVAL;
1874 
1875         pmd_write_lock(pmd);
1876         if (pmd->fail_io)
1877                 goto out;
1878 
1879         __set_abort_with_changes_flags(pmd);
1880         __destroy_persistent_data_objects(pmd);
1881         r = __create_persistent_data_objects(pmd, false);
1882         if (r)
1883                 pmd->fail_io = true;
1884 
1885 out:
1886         pmd_write_unlock(pmd);
1887 
1888         return r;
1889 }
1890 
1891 int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result)
1892 {
1893         int r = -EINVAL;
1894 
1895         down_read(&pmd->root_lock);
1896         if (!pmd->fail_io)
1897                 r = dm_sm_get_nr_free(pmd->data_sm, result);
1898         up_read(&pmd->root_lock);
1899 
1900         return r;
1901 }
1902 
1903 int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
1904                                           dm_block_t *result)
1905 {
1906         int r = -EINVAL;
1907 
1908         down_read(&pmd->root_lock);
1909         if (!pmd->fail_io)
1910                 r = dm_sm_get_nr_free(pmd->metadata_sm, result);
1911 
1912         if (!r) {
1913                 if (*result < pmd->metadata_reserve)
1914                         *result = 0;
1915                 else
1916                         *result -= pmd->metadata_reserve;
1917         }
1918         up_read(&pmd->root_lock);
1919 
1920         return r;
1921 }
1922 
1923 int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
1924                                   dm_block_t *result)
1925 {
1926         int r = -EINVAL;
1927 
1928         down_read(&pmd->root_lock);
1929         if (!pmd->fail_io)
1930                 r = dm_sm_get_nr_blocks(pmd->metadata_sm, result);
1931         up_read(&pmd->root_lock);
1932 
1933         return r;
1934 }
1935 
1936 int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
1937 {
1938         int r = -EINVAL;
1939 
1940         down_read(&pmd->root_lock);
1941         if (!pmd->fail_io)
1942                 r = dm_sm_get_nr_blocks(pmd->data_sm, result);
1943         up_read(&pmd->root_lock);
1944 
1945         return r;
1946 }
1947 
1948 int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result)
1949 {
1950         int r = -EINVAL;
1951         struct dm_pool_metadata *pmd = td->pmd;
1952 
1953         down_read(&pmd->root_lock);
1954         if (!pmd->fail_io) {
1955                 *result = td->mapped_blocks;
1956                 r = 0;
1957         }
1958         up_read(&pmd->root_lock);
1959 
1960         return r;
1961 }
1962 
1963 static int __highest_block(struct dm_thin_device *td, dm_block_t *result)
1964 {
1965         int r;
1966         __le64 value_le;
1967         dm_block_t thin_root;
1968         struct dm_pool_metadata *pmd = td->pmd;
1969 
1970         r = dm_btree_lookup(&pmd->tl_info, pmd->root, &td->id, &value_le);
1971         if (r)
1972                 return r;
1973 
1974         thin_root = le64_to_cpu(value_le);
1975 
1976         return dm_btree_find_highest_key(&pmd->bl_info, thin_root, result);
1977 }
1978 
1979 int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
1980                                      dm_block_t *result)
1981 {
1982         int r = -EINVAL;
1983         struct dm_pool_metadata *pmd = td->pmd;
1984 
1985         down_read(&pmd->root_lock);
1986         if (!pmd->fail_io)
1987                 r = __highest_block(td, result);
1988         up_read(&pmd->root_lock);
1989 
1990         return r;
1991 }
1992 
1993 static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count)
1994 {
1995         int r;
1996         dm_block_t old_count;
1997 
1998         r = dm_sm_get_nr_blocks(sm, &old_count);
1999         if (r)
2000                 return r;
2001 
2002         if (new_count == old_count)
2003                 return 0;
2004 
2005         if (new_count < old_count) {
2006                 DMERR("cannot reduce size of space map");
2007                 return -EINVAL;
2008         }
2009 
2010         return dm_sm_extend(sm, new_count - old_count);
2011 }
2012 
2013 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
2014 {
2015         int r = -EINVAL;
2016 
2017         pmd_write_lock(pmd);
2018         if (!pmd->fail_io)
2019                 r = __resize_space_map(pmd->data_sm, new_count);
2020         pmd_write_unlock(pmd);
2021 
2022         return r;
2023 }
2024 
2025 int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
2026 {
2027         int r = -EINVAL;
2028 
2029         pmd_write_lock(pmd);
2030         if (!pmd->fail_io) {
2031                 r = __resize_space_map(pmd->metadata_sm, new_count);
2032                 if (!r)
2033                         __set_metadata_reserve(pmd);
2034         }
2035         pmd_write_unlock(pmd);
2036 
2037         return r;
2038 }
2039 
2040 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
2041 {
2042         pmd_write_lock_in_core(pmd);
2043         dm_bm_set_read_only(pmd->bm);
2044         pmd_write_unlock(pmd);
2045 }
2046 
2047 void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd)
2048 {
2049         pmd_write_lock_in_core(pmd);
2050         dm_bm_set_read_write(pmd->bm);
2051         pmd_write_unlock(pmd);
2052 }
2053 
2054 int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
2055                                         dm_block_t threshold,
2056                                         dm_sm_threshold_fn fn,
2057                                         void *context)
2058 {
2059         int r;
2060 
2061         pmd_write_lock_in_core(pmd);
2062         r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
2063         pmd_write_unlock(pmd);
2064 
2065         return r;
2066 }
2067 
2068 void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
2069                                           dm_pool_pre_commit_fn fn,
2070                                           void *context)
2071 {
2072         pmd_write_lock_in_core(pmd);
2073         pmd->pre_commit_fn = fn;
2074         pmd->pre_commit_context = context;
2075         pmd_write_unlock(pmd);
2076 }
2077 
2078 int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
2079 {
2080         int r = -EINVAL;
2081         struct dm_block *sblock;
2082         struct thin_disk_superblock *disk_super;
2083 
2084         pmd_write_lock(pmd);
2085         if (pmd->fail_io)
2086                 goto out;
2087 
2088         pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG;
2089 
2090         r = superblock_lock(pmd, &sblock);
2091         if (r) {
2092                 DMERR("couldn't lock superblock");
2093                 goto out;
2094         }
2095 
2096         disk_super = dm_block_data(sblock);
2097         disk_super->flags = cpu_to_le32(pmd->flags);
2098 
2099         dm_bm_unlock(sblock);
2100 out:
2101         pmd_write_unlock(pmd);
2102         return r;
2103 }
2104 
2105 bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd)
2106 {
2107         bool needs_check;
2108 
2109         down_read(&pmd->root_lock);
2110         needs_check = pmd->flags & THIN_METADATA_NEEDS_CHECK_FLAG;
2111         up_read(&pmd->root_lock);
2112 
2113         return needs_check;
2114 }
2115 
2116 void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd)
2117 {
2118         down_read(&pmd->root_lock);
2119         if (!pmd->fail_io)
2120                 dm_tm_issue_prefetches(pmd->tm);
2121         up_read(&pmd->root_lock);
2122 }

/* [<][>][^][v][top][bottom][index][help] */