This source file includes following definitions.
- node_shift
 
- node_copy
 
- delete_at
 
- merge_threshold
 
- init_child
 
- exit_child
 
- shift
 
- __rebalance2
 
- rebalance2
 
- delete_center_node
 
- redistribute3
 
- __rebalance3
 
- rebalance3
 
- rebalance_children
 
- do_leaf
 
- remove_raw
 
- dm_btree_remove
 
- remove_nearest
 
- remove_one
 
- dm_btree_remove_leaves
 
   1 
   2 
   3 
   4 
   5 
   6 
   7 #include "dm-btree.h"
   8 #include "dm-btree-internal.h"
   9 #include "dm-transaction-manager.h"
  10 
  11 #include <linux/export.h>
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 
  30 
  31 
  32 
  33 
  34 
  35 
  36 
  37 
  38 
  39 
  40 
  41 
  42 
  43 
  44 
  45 
  46 
  47 
  48 
  49 
  50 
  51 
  52 
  53 
  54 
  55 
  56 static void node_shift(struct btree_node *n, int shift)
  57 {
  58         uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
  59         uint32_t value_size = le32_to_cpu(n->header.value_size);
  60 
  61         if (shift < 0) {
  62                 shift = -shift;
  63                 BUG_ON(shift > nr_entries);
  64                 BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift));
  65                 memmove(key_ptr(n, 0),
  66                         key_ptr(n, shift),
  67                         (nr_entries - shift) * sizeof(__le64));
  68                 memmove(value_ptr(n, 0),
  69                         value_ptr(n, shift),
  70                         (nr_entries - shift) * value_size);
  71         } else {
  72                 BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
  73                 memmove(key_ptr(n, shift),
  74                         key_ptr(n, 0),
  75                         nr_entries * sizeof(__le64));
  76                 memmove(value_ptr(n, shift),
  77                         value_ptr(n, 0),
  78                         nr_entries * value_size);
  79         }
  80 }
  81 
  82 static void node_copy(struct btree_node *left, struct btree_node *right, int shift)
  83 {
  84         uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
  85         uint32_t value_size = le32_to_cpu(left->header.value_size);
  86         BUG_ON(value_size != le32_to_cpu(right->header.value_size));
  87 
  88         if (shift < 0) {
  89                 shift = -shift;
  90                 BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
  91                 memcpy(key_ptr(left, nr_left),
  92                        key_ptr(right, 0),
  93                        shift * sizeof(__le64));
  94                 memcpy(value_ptr(left, nr_left),
  95                        value_ptr(right, 0),
  96                        shift * value_size);
  97         } else {
  98                 BUG_ON(shift > le32_to_cpu(right->header.max_entries));
  99                 memcpy(key_ptr(right, 0),
 100                        key_ptr(left, nr_left - shift),
 101                        shift * sizeof(__le64));
 102                 memcpy(value_ptr(right, 0),
 103                        value_ptr(left, nr_left - shift),
 104                        shift * value_size);
 105         }
 106 }
 107 
 108 
 109 
 110 
 111 static void delete_at(struct btree_node *n, unsigned index)
 112 {
 113         unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
 114         unsigned nr_to_copy = nr_entries - (index + 1);
 115         uint32_t value_size = le32_to_cpu(n->header.value_size);
 116         BUG_ON(index >= nr_entries);
 117 
 118         if (nr_to_copy) {
 119                 memmove(key_ptr(n, index),
 120                         key_ptr(n, index + 1),
 121                         nr_to_copy * sizeof(__le64));
 122 
 123                 memmove(value_ptr(n, index),
 124                         value_ptr(n, index + 1),
 125                         nr_to_copy * value_size);
 126         }
 127 
 128         n->header.nr_entries = cpu_to_le32(nr_entries - 1);
 129 }
 130 
 131 static unsigned merge_threshold(struct btree_node *n)
 132 {
 133         return le32_to_cpu(n->header.max_entries) / 3;
 134 }
 135 
 136 struct child {
 137         unsigned index;
 138         struct dm_block *block;
 139         struct btree_node *n;
 140 };
 141 
 142 static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
 143                       struct btree_node *parent,
 144                       unsigned index, struct child *result)
 145 {
 146         int r, inc;
 147         dm_block_t root;
 148 
 149         result->index = index;
 150         root = value64(parent, index);
 151 
 152         r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
 153                                &result->block, &inc);
 154         if (r)
 155                 return r;
 156 
 157         result->n = dm_block_data(result->block);
 158 
 159         if (inc)
 160                 inc_children(info->tm, result->n, vt);
 161 
 162         *((__le64 *) value_ptr(parent, index)) =
 163                 cpu_to_le64(dm_block_location(result->block));
 164 
 165         return 0;
 166 }
 167 
 168 static void exit_child(struct dm_btree_info *info, struct child *c)
 169 {
 170         dm_tm_unlock(info->tm, c->block);
 171 }
 172 
 173 static void shift(struct btree_node *left, struct btree_node *right, int count)
 174 {
 175         uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
 176         uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
 177         uint32_t max_entries = le32_to_cpu(left->header.max_entries);
 178         uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
 179 
 180         BUG_ON(max_entries != r_max_entries);
 181         BUG_ON(nr_left - count > max_entries);
 182         BUG_ON(nr_right + count > max_entries);
 183 
 184         if (!count)
 185                 return;
 186 
 187         if (count > 0) {
 188                 node_shift(right, count);
 189                 node_copy(left, right, count);
 190         } else {
 191                 node_copy(left, right, count);
 192                 node_shift(right, count);
 193         }
 194 
 195         left->header.nr_entries = cpu_to_le32(nr_left - count);
 196         right->header.nr_entries = cpu_to_le32(nr_right + count);
 197 }
 198 
 199 static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
 200                          struct child *l, struct child *r)
 201 {
 202         struct btree_node *left = l->n;
 203         struct btree_node *right = r->n;
 204         uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
 205         uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
 206         
 207 
 208 
 209 
 210 
 211 
 212         unsigned int threshold = 2 * (merge_threshold(left) + 1);
 213 
 214         if (nr_left + nr_right < threshold) {
 215                 
 216 
 217 
 218                 node_copy(left, right, -nr_right);
 219                 left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
 220                 delete_at(parent, r->index);
 221 
 222                 
 223 
 224 
 225 
 226                 dm_tm_dec(info->tm, dm_block_location(r->block));
 227         } else {
 228                 
 229 
 230 
 231                 unsigned target_left = (nr_left + nr_right) / 2;
 232                 shift(left, right, nr_left - target_left);
 233                 *key_ptr(parent, r->index) = right->keys[0];
 234         }
 235 }
 236 
 237 static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
 238                       struct dm_btree_value_type *vt, unsigned left_index)
 239 {
 240         int r;
 241         struct btree_node *parent;
 242         struct child left, right;
 243 
 244         parent = dm_block_data(shadow_current(s));
 245 
 246         r = init_child(info, vt, parent, left_index, &left);
 247         if (r)
 248                 return r;
 249 
 250         r = init_child(info, vt, parent, left_index + 1, &right);
 251         if (r) {
 252                 exit_child(info, &left);
 253                 return r;
 254         }
 255 
 256         __rebalance2(info, parent, &left, &right);
 257 
 258         exit_child(info, &left);
 259         exit_child(info, &right);
 260 
 261         return 0;
 262 }
 263 
 264 
 265 
 266 
 267 
 268 
 269 static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
 270                                struct child *l, struct child *c, struct child *r,
 271                                struct btree_node *left, struct btree_node *center, struct btree_node *right,
 272                                uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
 273 {
 274         uint32_t max_entries = le32_to_cpu(left->header.max_entries);
 275         unsigned shift = min(max_entries - nr_left, nr_center);
 276 
 277         BUG_ON(nr_left + shift > max_entries);
 278         node_copy(left, center, -shift);
 279         left->header.nr_entries = cpu_to_le32(nr_left + shift);
 280 
 281         if (shift != nr_center) {
 282                 shift = nr_center - shift;
 283                 BUG_ON((nr_right + shift) > max_entries);
 284                 node_shift(right, shift);
 285                 node_copy(center, right, shift);
 286                 right->header.nr_entries = cpu_to_le32(nr_right + shift);
 287         }
 288         *key_ptr(parent, r->index) = right->keys[0];
 289 
 290         delete_at(parent, c->index);
 291         r->index--;
 292 
 293         dm_tm_dec(info->tm, dm_block_location(c->block));
 294         __rebalance2(info, parent, l, r);
 295 }
 296 
 297 
 298 
 299 
 300 static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
 301                           struct child *l, struct child *c, struct child *r,
 302                           struct btree_node *left, struct btree_node *center, struct btree_node *right,
 303                           uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
 304 {
 305         int s;
 306         uint32_t max_entries = le32_to_cpu(left->header.max_entries);
 307         unsigned total = nr_left + nr_center + nr_right;
 308         unsigned target_right = total / 3;
 309         unsigned remainder = (target_right * 3) != total;
 310         unsigned target_left = target_right + remainder;
 311 
 312         BUG_ON(target_left > max_entries);
 313         BUG_ON(target_right > max_entries);
 314 
 315         if (nr_left < nr_right) {
 316                 s = nr_left - target_left;
 317 
 318                 if (s < 0 && nr_center < -s) {
 319                         
 320                         shift(left, center, -nr_center);
 321                         s += nr_center;
 322                         shift(left, right, s);
 323                         nr_right += s;
 324                 } else
 325                         shift(left, center, s);
 326 
 327                 shift(center, right, target_right - nr_right);
 328 
 329         } else {
 330                 s = target_right - nr_right;
 331                 if (s > 0 && nr_center < s) {
 332                         
 333                         shift(center, right, nr_center);
 334                         s -= nr_center;
 335                         shift(left, right, s);
 336                         nr_left -= s;
 337                 } else
 338                         shift(center, right, s);
 339 
 340                 shift(left, center, nr_left - target_left);
 341         }
 342 
 343         *key_ptr(parent, c->index) = center->keys[0];
 344         *key_ptr(parent, r->index) = right->keys[0];
 345 }
 346 
 347 static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
 348                          struct child *l, struct child *c, struct child *r)
 349 {
 350         struct btree_node *left = l->n;
 351         struct btree_node *center = c->n;
 352         struct btree_node *right = r->n;
 353 
 354         uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
 355         uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
 356         uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
 357 
 358         unsigned threshold = merge_threshold(left) * 4 + 1;
 359 
 360         BUG_ON(left->header.max_entries != center->header.max_entries);
 361         BUG_ON(center->header.max_entries != right->header.max_entries);
 362 
 363         if ((nr_left + nr_center + nr_right) < threshold)
 364                 delete_center_node(info, parent, l, c, r, left, center, right,
 365                                    nr_left, nr_center, nr_right);
 366         else
 367                 redistribute3(info, parent, l, c, r, left, center, right,
 368                               nr_left, nr_center, nr_right);
 369 }
 370 
 371 static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
 372                       struct dm_btree_value_type *vt, unsigned left_index)
 373 {
 374         int r;
 375         struct btree_node *parent = dm_block_data(shadow_current(s));
 376         struct child left, center, right;
 377 
 378         
 379 
 380 
 381         r = init_child(info, vt, parent, left_index, &left);
 382         if (r)
 383                 return r;
 384 
 385         r = init_child(info, vt, parent, left_index + 1, ¢er);
 386         if (r) {
 387                 exit_child(info, &left);
 388                 return r;
 389         }
 390 
 391         r = init_child(info, vt, parent, left_index + 2, &right);
 392         if (r) {
 393                 exit_child(info, &left);
 394                 exit_child(info, ¢er);
 395                 return r;
 396         }
 397 
 398         __rebalance3(info, parent, &left, ¢er, &right);
 399 
 400         exit_child(info, &left);
 401         exit_child(info, ¢er);
 402         exit_child(info, &right);
 403 
 404         return 0;
 405 }
 406 
 407 static int rebalance_children(struct shadow_spine *s,
 408                               struct dm_btree_info *info,
 409                               struct dm_btree_value_type *vt, uint64_t key)
 410 {
 411         int i, r, has_left_sibling, has_right_sibling;
 412         struct btree_node *n;
 413 
 414         n = dm_block_data(shadow_current(s));
 415 
 416         if (le32_to_cpu(n->header.nr_entries) == 1) {
 417                 struct dm_block *child;
 418                 dm_block_t b = value64(n, 0);
 419 
 420                 r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
 421                 if (r)
 422                         return r;
 423 
 424                 memcpy(n, dm_block_data(child),
 425                        dm_bm_block_size(dm_tm_get_bm(info->tm)));
 426                 dm_tm_unlock(info->tm, child);
 427 
 428                 dm_tm_dec(info->tm, dm_block_location(child));
 429                 return 0;
 430         }
 431 
 432         i = lower_bound(n, key);
 433         if (i < 0)
 434                 return -ENODATA;
 435 
 436         has_left_sibling = i > 0;
 437         has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
 438 
 439         if (!has_left_sibling)
 440                 r = rebalance2(s, info, vt, i);
 441 
 442         else if (!has_right_sibling)
 443                 r = rebalance2(s, info, vt, i - 1);
 444 
 445         else
 446                 r = rebalance3(s, info, vt, i - 1);
 447 
 448         return r;
 449 }
 450 
 451 static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index)
 452 {
 453         int i = lower_bound(n, key);
 454 
 455         if ((i < 0) ||
 456             (i >= le32_to_cpu(n->header.nr_entries)) ||
 457             (le64_to_cpu(n->keys[i]) != key))
 458                 return -ENODATA;
 459 
 460         *index = i;
 461 
 462         return 0;
 463 }
 464 
 465 
 466 
 467 
 468 
 469 static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
 470                       struct dm_btree_value_type *vt, dm_block_t root,
 471                       uint64_t key, unsigned *index)
 472 {
 473         int i = *index, r;
 474         struct btree_node *n;
 475 
 476         for (;;) {
 477                 r = shadow_step(s, root, vt);
 478                 if (r < 0)
 479                         break;
 480 
 481                 
 482 
 483 
 484 
 485 
 486                 if (shadow_has_parent(s)) {
 487                         __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
 488                         memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
 489                                &location, sizeof(__le64));
 490                 }
 491 
 492                 n = dm_block_data(shadow_current(s));
 493 
 494                 if (le32_to_cpu(n->header.flags) & LEAF_NODE)
 495                         return do_leaf(n, key, index);
 496 
 497                 r = rebalance_children(s, info, vt, key);
 498                 if (r)
 499                         break;
 500 
 501                 n = dm_block_data(shadow_current(s));
 502                 if (le32_to_cpu(n->header.flags) & LEAF_NODE)
 503                         return do_leaf(n, key, index);
 504 
 505                 i = lower_bound(n, key);
 506 
 507                 
 508 
 509 
 510 
 511 
 512                 root = value64(n, i);
 513         }
 514 
 515         return r;
 516 }
 517 
 518 int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 519                     uint64_t *keys, dm_block_t *new_root)
 520 {
 521         unsigned level, last_level = info->levels - 1;
 522         int index = 0, r = 0;
 523         struct shadow_spine spine;
 524         struct btree_node *n;
 525         struct dm_btree_value_type le64_vt;
 526 
 527         init_le64_type(info->tm, &le64_vt);
 528         init_shadow_spine(&spine, info);
 529         for (level = 0; level < info->levels; level++) {
 530                 r = remove_raw(&spine, info,
 531                                (level == last_level ?
 532                                 &info->value_type : &le64_vt),
 533                                root, keys[level], (unsigned *)&index);
 534                 if (r < 0)
 535                         break;
 536 
 537                 n = dm_block_data(shadow_current(&spine));
 538                 if (level != last_level) {
 539                         root = value64(n, index);
 540                         continue;
 541                 }
 542 
 543                 BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
 544 
 545                 if (info->value_type.dec)
 546                         info->value_type.dec(info->value_type.context,
 547                                              value_ptr(n, index));
 548 
 549                 delete_at(n, index);
 550         }
 551 
 552         *new_root = shadow_root(&spine);
 553         exit_shadow_spine(&spine);
 554 
 555         return r;
 556 }
 557 EXPORT_SYMBOL_GPL(dm_btree_remove);
 558 
 559 
 560 
 561 static int remove_nearest(struct shadow_spine *s, struct dm_btree_info *info,
 562                           struct dm_btree_value_type *vt, dm_block_t root,
 563                           uint64_t key, int *index)
 564 {
 565         int i = *index, r;
 566         struct btree_node *n;
 567 
 568         for (;;) {
 569                 r = shadow_step(s, root, vt);
 570                 if (r < 0)
 571                         break;
 572 
 573                 
 574 
 575 
 576 
 577 
 578                 if (shadow_has_parent(s)) {
 579                         __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
 580                         memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
 581                                &location, sizeof(__le64));
 582                 }
 583 
 584                 n = dm_block_data(shadow_current(s));
 585 
 586                 if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
 587                         *index = lower_bound(n, key);
 588                         return 0;
 589                 }
 590 
 591                 r = rebalance_children(s, info, vt, key);
 592                 if (r)
 593                         break;
 594 
 595                 n = dm_block_data(shadow_current(s));
 596                 if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
 597                         *index = lower_bound(n, key);
 598                         return 0;
 599                 }
 600 
 601                 i = lower_bound(n, key);
 602 
 603                 
 604 
 605 
 606 
 607 
 608                 root = value64(n, i);
 609         }
 610 
 611         return r;
 612 }
 613 
 614 static int remove_one(struct dm_btree_info *info, dm_block_t root,
 615                       uint64_t *keys, uint64_t end_key,
 616                       dm_block_t *new_root, unsigned *nr_removed)
 617 {
 618         unsigned level, last_level = info->levels - 1;
 619         int index = 0, r = 0;
 620         struct shadow_spine spine;
 621         struct btree_node *n;
 622         struct dm_btree_value_type le64_vt;
 623         uint64_t k;
 624 
 625         init_le64_type(info->tm, &le64_vt);
 626         init_shadow_spine(&spine, info);
 627         for (level = 0; level < last_level; level++) {
 628                 r = remove_raw(&spine, info, &le64_vt,
 629                                root, keys[level], (unsigned *) &index);
 630                 if (r < 0)
 631                         goto out;
 632 
 633                 n = dm_block_data(shadow_current(&spine));
 634                 root = value64(n, index);
 635         }
 636 
 637         r = remove_nearest(&spine, info, &info->value_type,
 638                            root, keys[last_level], &index);
 639         if (r < 0)
 640                 goto out;
 641 
 642         n = dm_block_data(shadow_current(&spine));
 643 
 644         if (index < 0)
 645                 index = 0;
 646 
 647         if (index >= le32_to_cpu(n->header.nr_entries)) {
 648                 r = -ENODATA;
 649                 goto out;
 650         }
 651 
 652         k = le64_to_cpu(n->keys[index]);
 653         if (k >= keys[last_level] && k < end_key) {
 654                 if (info->value_type.dec)
 655                         info->value_type.dec(info->value_type.context,
 656                                              value_ptr(n, index));
 657 
 658                 delete_at(n, index);
 659                 keys[last_level] = k + 1ull;
 660 
 661         } else
 662                 r = -ENODATA;
 663 
 664 out:
 665         *new_root = shadow_root(&spine);
 666         exit_shadow_spine(&spine);
 667 
 668         return r;
 669 }
 670 
 671 int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
 672                            uint64_t *first_key, uint64_t end_key,
 673                            dm_block_t *new_root, unsigned *nr_removed)
 674 {
 675         int r;
 676 
 677         *nr_removed = 0;
 678         do {
 679                 r = remove_one(info, root, first_key, end_key, &root, nr_removed);
 680                 if (!r)
 681                         (*nr_removed)++;
 682         } while (!r);
 683 
 684         *new_root = root;
 685         return r == -ENODATA ? 0 : r;
 686 }
 687 EXPORT_SYMBOL_GPL(dm_btree_remove_leaves);