root/fs/btrfs/send.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. inconsistent_snapshot_error
  2. need_send_hole
  3. fs_path_reset
  4. fs_path_alloc
  5. fs_path_alloc_reversed
  6. fs_path_free
  7. fs_path_len
  8. fs_path_ensure_buf
  9. fs_path_prepare_for_add
  10. fs_path_add
  11. fs_path_add_path
  12. fs_path_add_from_extent_buffer
  13. fs_path_copy
  14. fs_path_unreverse
  15. alloc_path_for_send
  16. write_buf
  17. tlv_put
  18. TLV_PUT_DEFINE_INT
  19. tlv_put_uuid
  20. tlv_put_btrfs_timespec
  21. send_header
  22. begin_cmd
  23. send_cmd
  24. send_rename
  25. send_link
  26. send_unlink
  27. send_rmdir
  28. __get_inode_info
  29. get_inode_info
  30. iterate_inode_ref
  31. iterate_dir_item
  32. __copy_first_ref
  33. get_inode_path
  34. __clone_root_cmp_bsearch
  35. __clone_root_cmp_sort
  36. __iterate_backrefs
  37. find_extent_clone
  38. read_symlink
  39. gen_unique_name
  40. get_cur_inode_state
  41. is_inode_existent
  42. lookup_dir_item_inode
  43. get_first_ref
  44. is_first_ref
  45. will_overwrite_ref
  46. did_overwrite_ref
  47. did_overwrite_first_ref
  48. name_cache_insert
  49. name_cache_delete
  50. name_cache_search
  51. name_cache_used
  52. name_cache_clean_unused
  53. name_cache_free
  54. __get_cur_name_and_parent
  55. get_cur_path
  56. send_subvol_begin
  57. send_truncate
  58. send_chmod
  59. send_chown
  60. send_utimes
  61. send_create_inode
  62. did_create_dir
  63. send_create_inode_if_needed
  64. set_ref_path
  65. __record_ref
  66. dup_ref
  67. __free_recorded_refs
  68. free_recorded_refs
  69. orphanize_inode
  70. add_orphan_dir_info
  71. get_orphan_dir_info
  72. is_waiting_for_rm
  73. free_orphan_dir_info
  74. can_rmdir
  75. is_waiting_for_move
  76. add_waiting_dir_move
  77. get_waiting_dir_move
  78. free_waiting_dir_move
  79. add_pending_dir_move
  80. get_pending_dir_moves
  81. path_loop
  82. apply_dir_move
  83. free_pending_move
  84. tail_append_pending_moves
  85. apply_children_dir_moves
  86. wait_for_dest_dir_move
  87. check_ino_in_path
  88. is_ancestor
  89. wait_for_parent_move
  90. update_ref_path
  91. process_recorded_refs
  92. record_ref
  93. __record_new_ref
  94. __record_deleted_ref
  95. record_new_ref
  96. record_deleted_ref
  97. __find_iref
  98. find_iref
  99. __record_changed_new_ref
  100. __record_changed_deleted_ref
  101. record_changed_ref
  102. process_all_refs
  103. send_set_xattr
  104. send_remove_xattr
  105. __process_new_xattr
  106. __process_deleted_xattr
  107. process_new_xattr
  108. process_deleted_xattr
  109. __find_xattr
  110. find_xattr
  111. __process_changed_new_xattr
  112. __process_changed_deleted_xattr
  113. process_changed_xattr
  114. process_all_new_xattrs
  115. fill_read_buf
  116. send_write
  117. send_clone
  118. send_update_extent
  119. send_hole
  120. send_extent_data
  121. clone_range
  122. send_write_or_clone
  123. is_extent_unchanged
  124. get_last_extent
  125. range_is_hole_in_parent
  126. maybe_send_hole
  127. process_extent
  128. process_all_extents
  129. process_recorded_refs_if_needed
  130. finish_inode_if_needed
  131. record_parent_ref
  132. btrfs_unlink_all_paths
  133. changed_inode
  134. changed_ref
  135. changed_xattr
  136. changed_extent
  137. dir_changed
  138. compare_refs
  139. changed_cb
  140. full_send_tree
  141. tree_move_down
  142. tree_move_next_or_upnext
  143. tree_advance
  144. tree_compare_item
  145. btrfs_compare_trees
  146. send_subvol
  147. ensure_commit_roots_uptodate
  148. flush_delalloc_roots
  149. btrfs_root_dec_send_in_progress
  150. dedupe_in_progress_warn
  151. btrfs_ioctl_send

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2012 Alexander Block.  All rights reserved.
   4  */
   5 
   6 #include <linux/bsearch.h>
   7 #include <linux/fs.h>
   8 #include <linux/file.h>
   9 #include <linux/sort.h>
  10 #include <linux/mount.h>
  11 #include <linux/xattr.h>
  12 #include <linux/posix_acl_xattr.h>
  13 #include <linux/radix-tree.h>
  14 #include <linux/vmalloc.h>
  15 #include <linux/string.h>
  16 #include <linux/compat.h>
  17 #include <linux/crc32c.h>
  18 
  19 #include "send.h"
  20 #include "backref.h"
  21 #include "locking.h"
  22 #include "disk-io.h"
  23 #include "btrfs_inode.h"
  24 #include "transaction.h"
  25 #include "compression.h"
  26 
  27 /*
  28  * Maximum number of references an extent can have in order for us to attempt to
  29  * issue clone operations instead of write operations. This currently exists to
  30  * avoid hitting limitations of the backreference walking code (taking a lot of
  31  * time and using too much memory for extents with large number of references).
  32  */
  33 #define SEND_MAX_EXTENT_REFS    64
  34 
  35 /*
  36  * A fs_path is a helper to dynamically build path names with unknown size.
  37  * It reallocates the internal buffer on demand.
  38  * It allows fast adding of path elements on the right side (normal path) and
  39  * fast adding to the left side (reversed path). A reversed path can also be
  40  * unreversed if needed.
  41  */
  42 struct fs_path {
  43         union {
  44                 struct {
  45                         char *start;
  46                         char *end;
  47 
  48                         char *buf;
  49                         unsigned short buf_len:15;
  50                         unsigned short reversed:1;
  51                         char inline_buf[];
  52                 };
  53                 /*
  54                  * Average path length does not exceed 200 bytes, we'll have
  55                  * better packing in the slab and higher chance to satisfy
  56                  * a allocation later during send.
  57                  */
  58                 char pad[256];
  59         };
  60 };
  61 #define FS_PATH_INLINE_SIZE \
  62         (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
  63 
  64 
  65 /* reused for each extent */
  66 struct clone_root {
  67         struct btrfs_root *root;
  68         u64 ino;
  69         u64 offset;
  70 
  71         u64 found_refs;
  72 };
  73 
  74 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
  75 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
  76 
  77 struct send_ctx {
  78         struct file *send_filp;
  79         loff_t send_off;
  80         char *send_buf;
  81         u32 send_size;
  82         u32 send_max_size;
  83         u64 total_send_size;
  84         u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
  85         u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
  86 
  87         struct btrfs_root *send_root;
  88         struct btrfs_root *parent_root;
  89         struct clone_root *clone_roots;
  90         int clone_roots_cnt;
  91 
  92         /* current state of the compare_tree call */
  93         struct btrfs_path *left_path;
  94         struct btrfs_path *right_path;
  95         struct btrfs_key *cmp_key;
  96 
  97         /*
  98          * infos of the currently processed inode. In case of deleted inodes,
  99          * these are the values from the deleted inode.
 100          */
 101         u64 cur_ino;
 102         u64 cur_inode_gen;
 103         int cur_inode_new;
 104         int cur_inode_new_gen;
 105         int cur_inode_deleted;
 106         u64 cur_inode_size;
 107         u64 cur_inode_mode;
 108         u64 cur_inode_rdev;
 109         u64 cur_inode_last_extent;
 110         u64 cur_inode_next_write_offset;
 111         bool ignore_cur_inode;
 112 
 113         u64 send_progress;
 114 
 115         struct list_head new_refs;
 116         struct list_head deleted_refs;
 117 
 118         struct radix_tree_root name_cache;
 119         struct list_head name_cache_list;
 120         int name_cache_size;
 121 
 122         struct file_ra_state ra;
 123 
 124         char *read_buf;
 125 
 126         /*
 127          * We process inodes by their increasing order, so if before an
 128          * incremental send we reverse the parent/child relationship of
 129          * directories such that a directory with a lower inode number was
 130          * the parent of a directory with a higher inode number, and the one
 131          * becoming the new parent got renamed too, we can't rename/move the
 132          * directory with lower inode number when we finish processing it - we
 133          * must process the directory with higher inode number first, then
 134          * rename/move it and then rename/move the directory with lower inode
 135          * number. Example follows.
 136          *
 137          * Tree state when the first send was performed:
 138          *
 139          * .
 140          * |-- a                   (ino 257)
 141          *     |-- b               (ino 258)
 142          *         |
 143          *         |
 144          *         |-- c           (ino 259)
 145          *         |   |-- d       (ino 260)
 146          *         |
 147          *         |-- c2          (ino 261)
 148          *
 149          * Tree state when the second (incremental) send is performed:
 150          *
 151          * .
 152          * |-- a                   (ino 257)
 153          *     |-- b               (ino 258)
 154          *         |-- c2          (ino 261)
 155          *             |-- d2      (ino 260)
 156          *                 |-- cc  (ino 259)
 157          *
 158          * The sequence of steps that lead to the second state was:
 159          *
 160          * mv /a/b/c/d /a/b/c2/d2
 161          * mv /a/b/c /a/b/c2/d2/cc
 162          *
 163          * "c" has lower inode number, but we can't move it (2nd mv operation)
 164          * before we move "d", which has higher inode number.
 165          *
 166          * So we just memorize which move/rename operations must be performed
 167          * later when their respective parent is processed and moved/renamed.
 168          */
 169 
 170         /* Indexed by parent directory inode number. */
 171         struct rb_root pending_dir_moves;
 172 
 173         /*
 174          * Reverse index, indexed by the inode number of a directory that
 175          * is waiting for the move/rename of its immediate parent before its
 176          * own move/rename can be performed.
 177          */
 178         struct rb_root waiting_dir_moves;
 179 
 180         /*
 181          * A directory that is going to be rm'ed might have a child directory
 182          * which is in the pending directory moves index above. In this case,
 183          * the directory can only be removed after the move/rename of its child
 184          * is performed. Example:
 185          *
 186          * Parent snapshot:
 187          *
 188          * .                        (ino 256)
 189          * |-- a/                   (ino 257)
 190          *     |-- b/               (ino 258)
 191          *         |-- c/           (ino 259)
 192          *         |   |-- x/       (ino 260)
 193          *         |
 194          *         |-- y/           (ino 261)
 195          *
 196          * Send snapshot:
 197          *
 198          * .                        (ino 256)
 199          * |-- a/                   (ino 257)
 200          *     |-- b/               (ino 258)
 201          *         |-- YY/          (ino 261)
 202          *              |-- x/      (ino 260)
 203          *
 204          * Sequence of steps that lead to the send snapshot:
 205          * rm -f /a/b/c/foo.txt
 206          * mv /a/b/y /a/b/YY
 207          * mv /a/b/c/x /a/b/YY
 208          * rmdir /a/b/c
 209          *
 210          * When the child is processed, its move/rename is delayed until its
 211          * parent is processed (as explained above), but all other operations
 212          * like update utimes, chown, chgrp, etc, are performed and the paths
 213          * that it uses for those operations must use the orphanized name of
 214          * its parent (the directory we're going to rm later), so we need to
 215          * memorize that name.
 216          *
 217          * Indexed by the inode number of the directory to be deleted.
 218          */
 219         struct rb_root orphan_dirs;
 220 };
 221 
 222 struct pending_dir_move {
 223         struct rb_node node;
 224         struct list_head list;
 225         u64 parent_ino;
 226         u64 ino;
 227         u64 gen;
 228         struct list_head update_refs;
 229 };
 230 
 231 struct waiting_dir_move {
 232         struct rb_node node;
 233         u64 ino;
 234         /*
 235          * There might be some directory that could not be removed because it
 236          * was waiting for this directory inode to be moved first. Therefore
 237          * after this directory is moved, we can try to rmdir the ino rmdir_ino.
 238          */
 239         u64 rmdir_ino;
 240         bool orphanized;
 241 };
 242 
 243 struct orphan_dir_info {
 244         struct rb_node node;
 245         u64 ino;
 246         u64 gen;
 247         u64 last_dir_index_offset;
 248 };
 249 
 250 struct name_cache_entry {
 251         struct list_head list;
 252         /*
 253          * radix_tree has only 32bit entries but we need to handle 64bit inums.
 254          * We use the lower 32bit of the 64bit inum to store it in the tree. If
 255          * more then one inum would fall into the same entry, we use radix_list
 256          * to store the additional entries. radix_list is also used to store
 257          * entries where two entries have the same inum but different
 258          * generations.
 259          */
 260         struct list_head radix_list;
 261         u64 ino;
 262         u64 gen;
 263         u64 parent_ino;
 264         u64 parent_gen;
 265         int ret;
 266         int need_later_update;
 267         int name_len;
 268         char name[];
 269 };
 270 
 271 #define ADVANCE                                                 1
 272 #define ADVANCE_ONLY_NEXT                                       -1
 273 
 274 enum btrfs_compare_tree_result {
 275         BTRFS_COMPARE_TREE_NEW,
 276         BTRFS_COMPARE_TREE_DELETED,
 277         BTRFS_COMPARE_TREE_CHANGED,
 278         BTRFS_COMPARE_TREE_SAME,
 279 };
 280 typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
 281                                   struct btrfs_path *right_path,
 282                                   struct btrfs_key *key,
 283                                   enum btrfs_compare_tree_result result,
 284                                   void *ctx);
 285 
 286 __cold
 287 static void inconsistent_snapshot_error(struct send_ctx *sctx,
 288                                         enum btrfs_compare_tree_result result,
 289                                         const char *what)
 290 {
 291         const char *result_string;
 292 
 293         switch (result) {
 294         case BTRFS_COMPARE_TREE_NEW:
 295                 result_string = "new";
 296                 break;
 297         case BTRFS_COMPARE_TREE_DELETED:
 298                 result_string = "deleted";
 299                 break;
 300         case BTRFS_COMPARE_TREE_CHANGED:
 301                 result_string = "updated";
 302                 break;
 303         case BTRFS_COMPARE_TREE_SAME:
 304                 ASSERT(0);
 305                 result_string = "unchanged";
 306                 break;
 307         default:
 308                 ASSERT(0);
 309                 result_string = "unexpected";
 310         }
 311 
 312         btrfs_err(sctx->send_root->fs_info,
 313                   "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
 314                   result_string, what, sctx->cmp_key->objectid,
 315                   sctx->send_root->root_key.objectid,
 316                   (sctx->parent_root ?
 317                    sctx->parent_root->root_key.objectid : 0));
 318 }
 319 
 320 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 321 
 322 static struct waiting_dir_move *
 323 get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
 324 
 325 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
 326 
 327 static int need_send_hole(struct send_ctx *sctx)
 328 {
 329         return (sctx->parent_root && !sctx->cur_inode_new &&
 330                 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
 331                 S_ISREG(sctx->cur_inode_mode));
 332 }
 333 
 334 static void fs_path_reset(struct fs_path *p)
 335 {
 336         if (p->reversed) {
 337                 p->start = p->buf + p->buf_len - 1;
 338                 p->end = p->start;
 339                 *p->start = 0;
 340         } else {
 341                 p->start = p->buf;
 342                 p->end = p->start;
 343                 *p->start = 0;
 344         }
 345 }
 346 
 347 static struct fs_path *fs_path_alloc(void)
 348 {
 349         struct fs_path *p;
 350 
 351         p = kmalloc(sizeof(*p), GFP_KERNEL);
 352         if (!p)
 353                 return NULL;
 354         p->reversed = 0;
 355         p->buf = p->inline_buf;
 356         p->buf_len = FS_PATH_INLINE_SIZE;
 357         fs_path_reset(p);
 358         return p;
 359 }
 360 
 361 static struct fs_path *fs_path_alloc_reversed(void)
 362 {
 363         struct fs_path *p;
 364 
 365         p = fs_path_alloc();
 366         if (!p)
 367                 return NULL;
 368         p->reversed = 1;
 369         fs_path_reset(p);
 370         return p;
 371 }
 372 
 373 static void fs_path_free(struct fs_path *p)
 374 {
 375         if (!p)
 376                 return;
 377         if (p->buf != p->inline_buf)
 378                 kfree(p->buf);
 379         kfree(p);
 380 }
 381 
 382 static int fs_path_len(struct fs_path *p)
 383 {
 384         return p->end - p->start;
 385 }
 386 
 387 static int fs_path_ensure_buf(struct fs_path *p, int len)
 388 {
 389         char *tmp_buf;
 390         int path_len;
 391         int old_buf_len;
 392 
 393         len++;
 394 
 395         if (p->buf_len >= len)
 396                 return 0;
 397 
 398         if (len > PATH_MAX) {
 399                 WARN_ON(1);
 400                 return -ENOMEM;
 401         }
 402 
 403         path_len = p->end - p->start;
 404         old_buf_len = p->buf_len;
 405 
 406         /*
 407          * First time the inline_buf does not suffice
 408          */
 409         if (p->buf == p->inline_buf) {
 410                 tmp_buf = kmalloc(len, GFP_KERNEL);
 411                 if (tmp_buf)
 412                         memcpy(tmp_buf, p->buf, old_buf_len);
 413         } else {
 414                 tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
 415         }
 416         if (!tmp_buf)
 417                 return -ENOMEM;
 418         p->buf = tmp_buf;
 419         /*
 420          * The real size of the buffer is bigger, this will let the fast path
 421          * happen most of the time
 422          */
 423         p->buf_len = ksize(p->buf);
 424 
 425         if (p->reversed) {
 426                 tmp_buf = p->buf + old_buf_len - path_len - 1;
 427                 p->end = p->buf + p->buf_len - 1;
 428                 p->start = p->end - path_len;
 429                 memmove(p->start, tmp_buf, path_len + 1);
 430         } else {
 431                 p->start = p->buf;
 432                 p->end = p->start + path_len;
 433         }
 434         return 0;
 435 }
 436 
 437 static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
 438                                    char **prepared)
 439 {
 440         int ret;
 441         int new_len;
 442 
 443         new_len = p->end - p->start + name_len;
 444         if (p->start != p->end)
 445                 new_len++;
 446         ret = fs_path_ensure_buf(p, new_len);
 447         if (ret < 0)
 448                 goto out;
 449 
 450         if (p->reversed) {
 451                 if (p->start != p->end)
 452                         *--p->start = '/';
 453                 p->start -= name_len;
 454                 *prepared = p->start;
 455         } else {
 456                 if (p->start != p->end)
 457                         *p->end++ = '/';
 458                 *prepared = p->end;
 459                 p->end += name_len;
 460                 *p->end = 0;
 461         }
 462 
 463 out:
 464         return ret;
 465 }
 466 
 467 static int fs_path_add(struct fs_path *p, const char *name, int name_len)
 468 {
 469         int ret;
 470         char *prepared;
 471 
 472         ret = fs_path_prepare_for_add(p, name_len, &prepared);
 473         if (ret < 0)
 474                 goto out;
 475         memcpy(prepared, name, name_len);
 476 
 477 out:
 478         return ret;
 479 }
 480 
 481 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
 482 {
 483         int ret;
 484         char *prepared;
 485 
 486         ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
 487         if (ret < 0)
 488                 goto out;
 489         memcpy(prepared, p2->start, p2->end - p2->start);
 490 
 491 out:
 492         return ret;
 493 }
 494 
 495 static int fs_path_add_from_extent_buffer(struct fs_path *p,
 496                                           struct extent_buffer *eb,
 497                                           unsigned long off, int len)
 498 {
 499         int ret;
 500         char *prepared;
 501 
 502         ret = fs_path_prepare_for_add(p, len, &prepared);
 503         if (ret < 0)
 504                 goto out;
 505 
 506         read_extent_buffer(eb, prepared, off, len);
 507 
 508 out:
 509         return ret;
 510 }
 511 
 512 static int fs_path_copy(struct fs_path *p, struct fs_path *from)
 513 {
 514         int ret;
 515 
 516         p->reversed = from->reversed;
 517         fs_path_reset(p);
 518 
 519         ret = fs_path_add_path(p, from);
 520 
 521         return ret;
 522 }
 523 
 524 
 525 static void fs_path_unreverse(struct fs_path *p)
 526 {
 527         char *tmp;
 528         int len;
 529 
 530         if (!p->reversed)
 531                 return;
 532 
 533         tmp = p->start;
 534         len = p->end - p->start;
 535         p->start = p->buf;
 536         p->end = p->start + len;
 537         memmove(p->start, tmp, len + 1);
 538         p->reversed = 0;
 539 }
 540 
 541 static struct btrfs_path *alloc_path_for_send(void)
 542 {
 543         struct btrfs_path *path;
 544 
 545         path = btrfs_alloc_path();
 546         if (!path)
 547                 return NULL;
 548         path->search_commit_root = 1;
 549         path->skip_locking = 1;
 550         path->need_commit_sem = 1;
 551         return path;
 552 }
 553 
 554 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
 555 {
 556         int ret;
 557         u32 pos = 0;
 558 
 559         while (pos < len) {
 560                 ret = kernel_write(filp, buf + pos, len - pos, off);
 561                 /* TODO handle that correctly */
 562                 /*if (ret == -ERESTARTSYS) {
 563                         continue;
 564                 }*/
 565                 if (ret < 0)
 566                         return ret;
 567                 if (ret == 0) {
 568                         return -EIO;
 569                 }
 570                 pos += ret;
 571         }
 572 
 573         return 0;
 574 }
 575 
 576 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
 577 {
 578         struct btrfs_tlv_header *hdr;
 579         int total_len = sizeof(*hdr) + len;
 580         int left = sctx->send_max_size - sctx->send_size;
 581 
 582         if (unlikely(left < total_len))
 583                 return -EOVERFLOW;
 584 
 585         hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
 586         hdr->tlv_type = cpu_to_le16(attr);
 587         hdr->tlv_len = cpu_to_le16(len);
 588         memcpy(hdr + 1, data, len);
 589         sctx->send_size += total_len;
 590 
 591         return 0;
 592 }
 593 
 594 #define TLV_PUT_DEFINE_INT(bits) \
 595         static int tlv_put_u##bits(struct send_ctx *sctx,               \
 596                         u##bits attr, u##bits value)                    \
 597         {                                                               \
 598                 __le##bits __tmp = cpu_to_le##bits(value);              \
 599                 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));      \
 600         }
 601 
 602 TLV_PUT_DEFINE_INT(64)
 603 
 604 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
 605                           const char *str, int len)
 606 {
 607         if (len == -1)
 608                 len = strlen(str);
 609         return tlv_put(sctx, attr, str, len);
 610 }
 611 
 612 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
 613                         const u8 *uuid)
 614 {
 615         return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
 616 }
 617 
 618 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
 619                                   struct extent_buffer *eb,
 620                                   struct btrfs_timespec *ts)
 621 {
 622         struct btrfs_timespec bts;
 623         read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
 624         return tlv_put(sctx, attr, &bts, sizeof(bts));
 625 }
 626 
 627 
 628 #define TLV_PUT(sctx, attrtype, data, attrlen) \
 629         do { \
 630                 ret = tlv_put(sctx, attrtype, data, attrlen); \
 631                 if (ret < 0) \
 632                         goto tlv_put_failure; \
 633         } while (0)
 634 
 635 #define TLV_PUT_INT(sctx, attrtype, bits, value) \
 636         do { \
 637                 ret = tlv_put_u##bits(sctx, attrtype, value); \
 638                 if (ret < 0) \
 639                         goto tlv_put_failure; \
 640         } while (0)
 641 
 642 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
 643 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
 644 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
 645 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
 646 #define TLV_PUT_STRING(sctx, attrtype, str, len) \
 647         do { \
 648                 ret = tlv_put_string(sctx, attrtype, str, len); \
 649                 if (ret < 0) \
 650                         goto tlv_put_failure; \
 651         } while (0)
 652 #define TLV_PUT_PATH(sctx, attrtype, p) \
 653         do { \
 654                 ret = tlv_put_string(sctx, attrtype, p->start, \
 655                         p->end - p->start); \
 656                 if (ret < 0) \
 657                         goto tlv_put_failure; \
 658         } while(0)
 659 #define TLV_PUT_UUID(sctx, attrtype, uuid) \
 660         do { \
 661                 ret = tlv_put_uuid(sctx, attrtype, uuid); \
 662                 if (ret < 0) \
 663                         goto tlv_put_failure; \
 664         } while (0)
 665 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
 666         do { \
 667                 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
 668                 if (ret < 0) \
 669                         goto tlv_put_failure; \
 670         } while (0)
 671 
 672 static int send_header(struct send_ctx *sctx)
 673 {
 674         struct btrfs_stream_header hdr;
 675 
 676         strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
 677         hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
 678 
 679         return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
 680                                         &sctx->send_off);
 681 }
 682 
 683 /*
 684  * For each command/item we want to send to userspace, we call this function.
 685  */
 686 static int begin_cmd(struct send_ctx *sctx, int cmd)
 687 {
 688         struct btrfs_cmd_header *hdr;
 689 
 690         if (WARN_ON(!sctx->send_buf))
 691                 return -EINVAL;
 692 
 693         BUG_ON(sctx->send_size);
 694 
 695         sctx->send_size += sizeof(*hdr);
 696         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 697         hdr->cmd = cpu_to_le16(cmd);
 698 
 699         return 0;
 700 }
 701 
 702 static int send_cmd(struct send_ctx *sctx)
 703 {
 704         int ret;
 705         struct btrfs_cmd_header *hdr;
 706         u32 crc;
 707 
 708         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 709         hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 710         hdr->crc = 0;
 711 
 712         crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 713         hdr->crc = cpu_to_le32(crc);
 714 
 715         ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
 716                                         &sctx->send_off);
 717 
 718         sctx->total_send_size += sctx->send_size;
 719         sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
 720         sctx->send_size = 0;
 721 
 722         return ret;
 723 }
 724 
 725 /*
 726  * Sends a move instruction to user space
 727  */
 728 static int send_rename(struct send_ctx *sctx,
 729                      struct fs_path *from, struct fs_path *to)
 730 {
 731         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
 732         int ret;
 733 
 734         btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
 735 
 736         ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
 737         if (ret < 0)
 738                 goto out;
 739 
 740         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
 741         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
 742 
 743         ret = send_cmd(sctx);
 744 
 745 tlv_put_failure:
 746 out:
 747         return ret;
 748 }
 749 
 750 /*
 751  * Sends a link instruction to user space
 752  */
 753 static int send_link(struct send_ctx *sctx,
 754                      struct fs_path *path, struct fs_path *lnk)
 755 {
 756         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
 757         int ret;
 758 
 759         btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
 760 
 761         ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
 762         if (ret < 0)
 763                 goto out;
 764 
 765         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 766         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
 767 
 768         ret = send_cmd(sctx);
 769 
 770 tlv_put_failure:
 771 out:
 772         return ret;
 773 }
 774 
 775 /*
 776  * Sends an unlink instruction to user space
 777  */
 778 static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
 779 {
 780         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
 781         int ret;
 782 
 783         btrfs_debug(fs_info, "send_unlink %s", path->start);
 784 
 785         ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
 786         if (ret < 0)
 787                 goto out;
 788 
 789         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 790 
 791         ret = send_cmd(sctx);
 792 
 793 tlv_put_failure:
 794 out:
 795         return ret;
 796 }
 797 
 798 /*
 799  * Sends a rmdir instruction to user space
 800  */
 801 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
 802 {
 803         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
 804         int ret;
 805 
 806         btrfs_debug(fs_info, "send_rmdir %s", path->start);
 807 
 808         ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
 809         if (ret < 0)
 810                 goto out;
 811 
 812         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 813 
 814         ret = send_cmd(sctx);
 815 
 816 tlv_put_failure:
 817 out:
 818         return ret;
 819 }
 820 
 821 /*
 822  * Helper function to retrieve some fields from an inode item.
 823  */
 824 static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
 825                           u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
 826                           u64 *gid, u64 *rdev)
 827 {
 828         int ret;
 829         struct btrfs_inode_item *ii;
 830         struct btrfs_key key;
 831 
 832         key.objectid = ino;
 833         key.type = BTRFS_INODE_ITEM_KEY;
 834         key.offset = 0;
 835         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 836         if (ret) {
 837                 if (ret > 0)
 838                         ret = -ENOENT;
 839                 return ret;
 840         }
 841 
 842         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
 843                         struct btrfs_inode_item);
 844         if (size)
 845                 *size = btrfs_inode_size(path->nodes[0], ii);
 846         if (gen)
 847                 *gen = btrfs_inode_generation(path->nodes[0], ii);
 848         if (mode)
 849                 *mode = btrfs_inode_mode(path->nodes[0], ii);
 850         if (uid)
 851                 *uid = btrfs_inode_uid(path->nodes[0], ii);
 852         if (gid)
 853                 *gid = btrfs_inode_gid(path->nodes[0], ii);
 854         if (rdev)
 855                 *rdev = btrfs_inode_rdev(path->nodes[0], ii);
 856 
 857         return ret;
 858 }
 859 
 860 static int get_inode_info(struct btrfs_root *root,
 861                           u64 ino, u64 *size, u64 *gen,
 862                           u64 *mode, u64 *uid, u64 *gid,
 863                           u64 *rdev)
 864 {
 865         struct btrfs_path *path;
 866         int ret;
 867 
 868         path = alloc_path_for_send();
 869         if (!path)
 870                 return -ENOMEM;
 871         ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
 872                                rdev);
 873         btrfs_free_path(path);
 874         return ret;
 875 }
 876 
 877 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
 878                                    struct fs_path *p,
 879                                    void *ctx);
 880 
 881 /*
 882  * Helper function to iterate the entries in ONE btrfs_inode_ref or
 883  * btrfs_inode_extref.
 884  * The iterate callback may return a non zero value to stop iteration. This can
 885  * be a negative value for error codes or 1 to simply stop it.
 886  *
 887  * path must point to the INODE_REF or INODE_EXTREF when called.
 888  */
 889 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
 890                              struct btrfs_key *found_key, int resolve,
 891                              iterate_inode_ref_t iterate, void *ctx)
 892 {
 893         struct extent_buffer *eb = path->nodes[0];
 894         struct btrfs_item *item;
 895         struct btrfs_inode_ref *iref;
 896         struct btrfs_inode_extref *extref;
 897         struct btrfs_path *tmp_path;
 898         struct fs_path *p;
 899         u32 cur = 0;
 900         u32 total;
 901         int slot = path->slots[0];
 902         u32 name_len;
 903         char *start;
 904         int ret = 0;
 905         int num = 0;
 906         int index;
 907         u64 dir;
 908         unsigned long name_off;
 909         unsigned long elem_size;
 910         unsigned long ptr;
 911 
 912         p = fs_path_alloc_reversed();
 913         if (!p)
 914                 return -ENOMEM;
 915 
 916         tmp_path = alloc_path_for_send();
 917         if (!tmp_path) {
 918                 fs_path_free(p);
 919                 return -ENOMEM;
 920         }
 921 
 922 
 923         if (found_key->type == BTRFS_INODE_REF_KEY) {
 924                 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
 925                                                     struct btrfs_inode_ref);
 926                 item = btrfs_item_nr(slot);
 927                 total = btrfs_item_size(eb, item);
 928                 elem_size = sizeof(*iref);
 929         } else {
 930                 ptr = btrfs_item_ptr_offset(eb, slot);
 931                 total = btrfs_item_size_nr(eb, slot);
 932                 elem_size = sizeof(*extref);
 933         }
 934 
 935         while (cur < total) {
 936                 fs_path_reset(p);
 937 
 938                 if (found_key->type == BTRFS_INODE_REF_KEY) {
 939                         iref = (struct btrfs_inode_ref *)(ptr + cur);
 940                         name_len = btrfs_inode_ref_name_len(eb, iref);
 941                         name_off = (unsigned long)(iref + 1);
 942                         index = btrfs_inode_ref_index(eb, iref);
 943                         dir = found_key->offset;
 944                 } else {
 945                         extref = (struct btrfs_inode_extref *)(ptr + cur);
 946                         name_len = btrfs_inode_extref_name_len(eb, extref);
 947                         name_off = (unsigned long)&extref->name;
 948                         index = btrfs_inode_extref_index(eb, extref);
 949                         dir = btrfs_inode_extref_parent(eb, extref);
 950                 }
 951 
 952                 if (resolve) {
 953                         start = btrfs_ref_to_path(root, tmp_path, name_len,
 954                                                   name_off, eb, dir,
 955                                                   p->buf, p->buf_len);
 956                         if (IS_ERR(start)) {
 957                                 ret = PTR_ERR(start);
 958                                 goto out;
 959                         }
 960                         if (start < p->buf) {
 961                                 /* overflow , try again with larger buffer */
 962                                 ret = fs_path_ensure_buf(p,
 963                                                 p->buf_len + p->buf - start);
 964                                 if (ret < 0)
 965                                         goto out;
 966                                 start = btrfs_ref_to_path(root, tmp_path,
 967                                                           name_len, name_off,
 968                                                           eb, dir,
 969                                                           p->buf, p->buf_len);
 970                                 if (IS_ERR(start)) {
 971                                         ret = PTR_ERR(start);
 972                                         goto out;
 973                                 }
 974                                 BUG_ON(start < p->buf);
 975                         }
 976                         p->start = start;
 977                 } else {
 978                         ret = fs_path_add_from_extent_buffer(p, eb, name_off,
 979                                                              name_len);
 980                         if (ret < 0)
 981                                 goto out;
 982                 }
 983 
 984                 cur += elem_size + name_len;
 985                 ret = iterate(num, dir, index, p, ctx);
 986                 if (ret)
 987                         goto out;
 988                 num++;
 989         }
 990 
 991 out:
 992         btrfs_free_path(tmp_path);
 993         fs_path_free(p);
 994         return ret;
 995 }
 996 
 997 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
 998                                   const char *name, int name_len,
 999                                   const char *data, int data_len,
1000                                   u8 type, void *ctx);
1001 
1002 /*
1003  * Helper function to iterate the entries in ONE btrfs_dir_item.
1004  * The iterate callback may return a non zero value to stop iteration. This can
1005  * be a negative value for error codes or 1 to simply stop it.
1006  *
1007  * path must point to the dir item when called.
1008  */
1009 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
1010                             iterate_dir_item_t iterate, void *ctx)
1011 {
1012         int ret = 0;
1013         struct extent_buffer *eb;
1014         struct btrfs_item *item;
1015         struct btrfs_dir_item *di;
1016         struct btrfs_key di_key;
1017         char *buf = NULL;
1018         int buf_len;
1019         u32 name_len;
1020         u32 data_len;
1021         u32 cur;
1022         u32 len;
1023         u32 total;
1024         int slot;
1025         int num;
1026         u8 type;
1027 
1028         /*
1029          * Start with a small buffer (1 page). If later we end up needing more
1030          * space, which can happen for xattrs on a fs with a leaf size greater
1031          * then the page size, attempt to increase the buffer. Typically xattr
1032          * values are small.
1033          */
1034         buf_len = PATH_MAX;
1035         buf = kmalloc(buf_len, GFP_KERNEL);
1036         if (!buf) {
1037                 ret = -ENOMEM;
1038                 goto out;
1039         }
1040 
1041         eb = path->nodes[0];
1042         slot = path->slots[0];
1043         item = btrfs_item_nr(slot);
1044         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1045         cur = 0;
1046         len = 0;
1047         total = btrfs_item_size(eb, item);
1048 
1049         num = 0;
1050         while (cur < total) {
1051                 name_len = btrfs_dir_name_len(eb, di);
1052                 data_len = btrfs_dir_data_len(eb, di);
1053                 type = btrfs_dir_type(eb, di);
1054                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
1055 
1056                 if (type == BTRFS_FT_XATTR) {
1057                         if (name_len > XATTR_NAME_MAX) {
1058                                 ret = -ENAMETOOLONG;
1059                                 goto out;
1060                         }
1061                         if (name_len + data_len >
1062                                         BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
1063                                 ret = -E2BIG;
1064                                 goto out;
1065                         }
1066                 } else {
1067                         /*
1068                          * Path too long
1069                          */
1070                         if (name_len + data_len > PATH_MAX) {
1071                                 ret = -ENAMETOOLONG;
1072                                 goto out;
1073                         }
1074                 }
1075 
1076                 if (name_len + data_len > buf_len) {
1077                         buf_len = name_len + data_len;
1078                         if (is_vmalloc_addr(buf)) {
1079                                 vfree(buf);
1080                                 buf = NULL;
1081                         } else {
1082                                 char *tmp = krealloc(buf, buf_len,
1083                                                 GFP_KERNEL | __GFP_NOWARN);
1084 
1085                                 if (!tmp)
1086                                         kfree(buf);
1087                                 buf = tmp;
1088                         }
1089                         if (!buf) {
1090                                 buf = kvmalloc(buf_len, GFP_KERNEL);
1091                                 if (!buf) {
1092                                         ret = -ENOMEM;
1093                                         goto out;
1094                                 }
1095                         }
1096                 }
1097 
1098                 read_extent_buffer(eb, buf, (unsigned long)(di + 1),
1099                                 name_len + data_len);
1100 
1101                 len = sizeof(*di) + name_len + data_len;
1102                 di = (struct btrfs_dir_item *)((char *)di + len);
1103                 cur += len;
1104 
1105                 ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1106                                 data_len, type, ctx);
1107                 if (ret < 0)
1108                         goto out;
1109                 if (ret) {
1110                         ret = 0;
1111                         goto out;
1112                 }
1113 
1114                 num++;
1115         }
1116 
1117 out:
1118         kvfree(buf);
1119         return ret;
1120 }
1121 
1122 static int __copy_first_ref(int num, u64 dir, int index,
1123                             struct fs_path *p, void *ctx)
1124 {
1125         int ret;
1126         struct fs_path *pt = ctx;
1127 
1128         ret = fs_path_copy(pt, p);
1129         if (ret < 0)
1130                 return ret;
1131 
1132         /* we want the first only */
1133         return 1;
1134 }
1135 
1136 /*
1137  * Retrieve the first path of an inode. If an inode has more then one
1138  * ref/hardlink, this is ignored.
1139  */
1140 static int get_inode_path(struct btrfs_root *root,
1141                           u64 ino, struct fs_path *path)
1142 {
1143         int ret;
1144         struct btrfs_key key, found_key;
1145         struct btrfs_path *p;
1146 
1147         p = alloc_path_for_send();
1148         if (!p)
1149                 return -ENOMEM;
1150 
1151         fs_path_reset(path);
1152 
1153         key.objectid = ino;
1154         key.type = BTRFS_INODE_REF_KEY;
1155         key.offset = 0;
1156 
1157         ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1158         if (ret < 0)
1159                 goto out;
1160         if (ret) {
1161                 ret = 1;
1162                 goto out;
1163         }
1164         btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1165         if (found_key.objectid != ino ||
1166             (found_key.type != BTRFS_INODE_REF_KEY &&
1167              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1168                 ret = -ENOENT;
1169                 goto out;
1170         }
1171 
1172         ret = iterate_inode_ref(root, p, &found_key, 1,
1173                                 __copy_first_ref, path);
1174         if (ret < 0)
1175                 goto out;
1176         ret = 0;
1177 
1178 out:
1179         btrfs_free_path(p);
1180         return ret;
1181 }
1182 
1183 struct backref_ctx {
1184         struct send_ctx *sctx;
1185 
1186         /* number of total found references */
1187         u64 found;
1188 
1189         /*
1190          * used for clones found in send_root. clones found behind cur_objectid
1191          * and cur_offset are not considered as allowed clones.
1192          */
1193         u64 cur_objectid;
1194         u64 cur_offset;
1195 
1196         /* may be truncated in case it's the last extent in a file */
1197         u64 extent_len;
1198 
1199         /* data offset in the file extent item */
1200         u64 data_offset;
1201 
1202         /* Just to check for bugs in backref resolving */
1203         int found_itself;
1204 };
1205 
1206 static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1207 {
1208         u64 root = (u64)(uintptr_t)key;
1209         struct clone_root *cr = (struct clone_root *)elt;
1210 
1211         if (root < cr->root->root_key.objectid)
1212                 return -1;
1213         if (root > cr->root->root_key.objectid)
1214                 return 1;
1215         return 0;
1216 }
1217 
1218 static int __clone_root_cmp_sort(const void *e1, const void *e2)
1219 {
1220         struct clone_root *cr1 = (struct clone_root *)e1;
1221         struct clone_root *cr2 = (struct clone_root *)e2;
1222 
1223         if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
1224                 return -1;
1225         if (cr1->root->root_key.objectid > cr2->root->root_key.objectid)
1226                 return 1;
1227         return 0;
1228 }
1229 
1230 /*
1231  * Called for every backref that is found for the current extent.
1232  * Results are collected in sctx->clone_roots->ino/offset/found_refs
1233  */
1234 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1235 {
1236         struct backref_ctx *bctx = ctx_;
1237         struct clone_root *found;
1238 
1239         /* First check if the root is in the list of accepted clone sources */
1240         found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1241                         bctx->sctx->clone_roots_cnt,
1242                         sizeof(struct clone_root),
1243                         __clone_root_cmp_bsearch);
1244         if (!found)
1245                 return 0;
1246 
1247         if (found->root == bctx->sctx->send_root &&
1248             ino == bctx->cur_objectid &&
1249             offset == bctx->cur_offset) {
1250                 bctx->found_itself = 1;
1251         }
1252 
1253         /*
1254          * Make sure we don't consider clones from send_root that are
1255          * behind the current inode/offset.
1256          */
1257         if (found->root == bctx->sctx->send_root) {
1258                 /*
1259                  * TODO for the moment we don't accept clones from the inode
1260                  * that is currently send. We may change this when
1261                  * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1262                  * file.
1263                  */
1264                 if (ino >= bctx->cur_objectid)
1265                         return 0;
1266         }
1267 
1268         bctx->found++;
1269         found->found_refs++;
1270         if (ino < found->ino) {
1271                 found->ino = ino;
1272                 found->offset = offset;
1273         } else if (found->ino == ino) {
1274                 /*
1275                  * same extent found more then once in the same file.
1276                  */
1277                 if (found->offset > offset + bctx->extent_len)
1278                         found->offset = offset;
1279         }
1280 
1281         return 0;
1282 }
1283 
1284 /*
1285  * Given an inode, offset and extent item, it finds a good clone for a clone
1286  * instruction. Returns -ENOENT when none could be found. The function makes
1287  * sure that the returned clone is usable at the point where sending is at the
1288  * moment. This means, that no clones are accepted which lie behind the current
1289  * inode+offset.
1290  *
1291  * path must point to the extent item when called.
1292  */
1293 static int find_extent_clone(struct send_ctx *sctx,
1294                              struct btrfs_path *path,
1295                              u64 ino, u64 data_offset,
1296                              u64 ino_size,
1297                              struct clone_root **found)
1298 {
1299         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
1300         int ret;
1301         int extent_type;
1302         u64 logical;
1303         u64 disk_byte;
1304         u64 num_bytes;
1305         u64 extent_item_pos;
1306         u64 flags = 0;
1307         struct btrfs_file_extent_item *fi;
1308         struct extent_buffer *eb = path->nodes[0];
1309         struct backref_ctx *backref_ctx = NULL;
1310         struct clone_root *cur_clone_root;
1311         struct btrfs_key found_key;
1312         struct btrfs_path *tmp_path;
1313         struct btrfs_extent_item *ei;
1314         int compressed;
1315         u32 i;
1316 
1317         tmp_path = alloc_path_for_send();
1318         if (!tmp_path)
1319                 return -ENOMEM;
1320 
1321         /* We only use this path under the commit sem */
1322         tmp_path->need_commit_sem = 0;
1323 
1324         backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
1325         if (!backref_ctx) {
1326                 ret = -ENOMEM;
1327                 goto out;
1328         }
1329 
1330         if (data_offset >= ino_size) {
1331                 /*
1332                  * There may be extents that lie behind the file's size.
1333                  * I at least had this in combination with snapshotting while
1334                  * writing large files.
1335                  */
1336                 ret = 0;
1337                 goto out;
1338         }
1339 
1340         fi = btrfs_item_ptr(eb, path->slots[0],
1341                         struct btrfs_file_extent_item);
1342         extent_type = btrfs_file_extent_type(eb, fi);
1343         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1344                 ret = -ENOENT;
1345                 goto out;
1346         }
1347         compressed = btrfs_file_extent_compression(eb, fi);
1348 
1349         num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1350         disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1351         if (disk_byte == 0) {
1352                 ret = -ENOENT;
1353                 goto out;
1354         }
1355         logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1356 
1357         down_read(&fs_info->commit_root_sem);
1358         ret = extent_from_logical(fs_info, disk_byte, tmp_path,
1359                                   &found_key, &flags);
1360         up_read(&fs_info->commit_root_sem);
1361 
1362         if (ret < 0)
1363                 goto out;
1364         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1365                 ret = -EIO;
1366                 goto out;
1367         }
1368 
1369         ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
1370                             struct btrfs_extent_item);
1371         /*
1372          * Backreference walking (iterate_extent_inodes() below) is currently
1373          * too expensive when an extent has a large number of references, both
1374          * in time spent and used memory. So for now just fallback to write
1375          * operations instead of clone operations when an extent has more than
1376          * a certain amount of references.
1377          */
1378         if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
1379                 ret = -ENOENT;
1380                 goto out;
1381         }
1382         btrfs_release_path(tmp_path);
1383 
1384         /*
1385          * Setup the clone roots.
1386          */
1387         for (i = 0; i < sctx->clone_roots_cnt; i++) {
1388                 cur_clone_root = sctx->clone_roots + i;
1389                 cur_clone_root->ino = (u64)-1;
1390                 cur_clone_root->offset = 0;
1391                 cur_clone_root->found_refs = 0;
1392         }
1393 
1394         backref_ctx->sctx = sctx;
1395         backref_ctx->found = 0;
1396         backref_ctx->cur_objectid = ino;
1397         backref_ctx->cur_offset = data_offset;
1398         backref_ctx->found_itself = 0;
1399         backref_ctx->extent_len = num_bytes;
1400         /*
1401          * For non-compressed extents iterate_extent_inodes() gives us extent
1402          * offsets that already take into account the data offset, but not for
1403          * compressed extents, since the offset is logical and not relative to
1404          * the physical extent locations. We must take this into account to
1405          * avoid sending clone offsets that go beyond the source file's size,
1406          * which would result in the clone ioctl failing with -EINVAL on the
1407          * receiving end.
1408          */
1409         if (compressed == BTRFS_COMPRESS_NONE)
1410                 backref_ctx->data_offset = 0;
1411         else
1412                 backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
1413 
1414         /*
1415          * The last extent of a file may be too large due to page alignment.
1416          * We need to adjust extent_len in this case so that the checks in
1417          * __iterate_backrefs work.
1418          */
1419         if (data_offset + num_bytes >= ino_size)
1420                 backref_ctx->extent_len = ino_size - data_offset;
1421 
1422         /*
1423          * Now collect all backrefs.
1424          */
1425         if (compressed == BTRFS_COMPRESS_NONE)
1426                 extent_item_pos = logical - found_key.objectid;
1427         else
1428                 extent_item_pos = 0;
1429         ret = iterate_extent_inodes(fs_info, found_key.objectid,
1430                                     extent_item_pos, 1, __iterate_backrefs,
1431                                     backref_ctx, false);
1432 
1433         if (ret < 0)
1434                 goto out;
1435 
1436         if (!backref_ctx->found_itself) {
1437                 /* found a bug in backref code? */
1438                 ret = -EIO;
1439                 btrfs_err(fs_info,
1440                           "did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu",
1441                           ino, data_offset, disk_byte, found_key.objectid);
1442                 goto out;
1443         }
1444 
1445         btrfs_debug(fs_info,
1446                     "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
1447                     data_offset, ino, num_bytes, logical);
1448 
1449         if (!backref_ctx->found)
1450                 btrfs_debug(fs_info, "no clones found");
1451 
1452         cur_clone_root = NULL;
1453         for (i = 0; i < sctx->clone_roots_cnt; i++) {
1454                 if (sctx->clone_roots[i].found_refs) {
1455                         if (!cur_clone_root)
1456                                 cur_clone_root = sctx->clone_roots + i;
1457                         else if (sctx->clone_roots[i].root == sctx->send_root)
1458                                 /* prefer clones from send_root over others */
1459                                 cur_clone_root = sctx->clone_roots + i;
1460                 }
1461 
1462         }
1463 
1464         if (cur_clone_root) {
1465                 *found = cur_clone_root;
1466                 ret = 0;
1467         } else {
1468                 ret = -ENOENT;
1469         }
1470 
1471 out:
1472         btrfs_free_path(tmp_path);
1473         kfree(backref_ctx);
1474         return ret;
1475 }
1476 
1477 static int read_symlink(struct btrfs_root *root,
1478                         u64 ino,
1479                         struct fs_path *dest)
1480 {
1481         int ret;
1482         struct btrfs_path *path;
1483         struct btrfs_key key;
1484         struct btrfs_file_extent_item *ei;
1485         u8 type;
1486         u8 compression;
1487         unsigned long off;
1488         int len;
1489 
1490         path = alloc_path_for_send();
1491         if (!path)
1492                 return -ENOMEM;
1493 
1494         key.objectid = ino;
1495         key.type = BTRFS_EXTENT_DATA_KEY;
1496         key.offset = 0;
1497         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1498         if (ret < 0)
1499                 goto out;
1500         if (ret) {
1501                 /*
1502                  * An empty symlink inode. Can happen in rare error paths when
1503                  * creating a symlink (transaction committed before the inode
1504                  * eviction handler removed the symlink inode items and a crash
1505                  * happened in between or the subvol was snapshoted in between).
1506                  * Print an informative message to dmesg/syslog so that the user
1507                  * can delete the symlink.
1508                  */
1509                 btrfs_err(root->fs_info,
1510                           "Found empty symlink inode %llu at root %llu",
1511                           ino, root->root_key.objectid);
1512                 ret = -EIO;
1513                 goto out;
1514         }
1515 
1516         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1517                         struct btrfs_file_extent_item);
1518         type = btrfs_file_extent_type(path->nodes[0], ei);
1519         compression = btrfs_file_extent_compression(path->nodes[0], ei);
1520         BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1521         BUG_ON(compression);
1522 
1523         off = btrfs_file_extent_inline_start(ei);
1524         len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
1525 
1526         ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1527 
1528 out:
1529         btrfs_free_path(path);
1530         return ret;
1531 }
1532 
1533 /*
1534  * Helper function to generate a file name that is unique in the root of
1535  * send_root and parent_root. This is used to generate names for orphan inodes.
1536  */
1537 static int gen_unique_name(struct send_ctx *sctx,
1538                            u64 ino, u64 gen,
1539                            struct fs_path *dest)
1540 {
1541         int ret = 0;
1542         struct btrfs_path *path;
1543         struct btrfs_dir_item *di;
1544         char tmp[64];
1545         int len;
1546         u64 idx = 0;
1547 
1548         path = alloc_path_for_send();
1549         if (!path)
1550                 return -ENOMEM;
1551 
1552         while (1) {
1553                 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1554                                 ino, gen, idx);
1555                 ASSERT(len < sizeof(tmp));
1556 
1557                 di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1558                                 path, BTRFS_FIRST_FREE_OBJECTID,
1559                                 tmp, strlen(tmp), 0);
1560                 btrfs_release_path(path);
1561                 if (IS_ERR(di)) {
1562                         ret = PTR_ERR(di);
1563                         goto out;
1564                 }
1565                 if (di) {
1566                         /* not unique, try again */
1567                         idx++;
1568                         continue;
1569                 }
1570 
1571                 if (!sctx->parent_root) {
1572                         /* unique */
1573                         ret = 0;
1574                         break;
1575                 }
1576 
1577                 di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1578                                 path, BTRFS_FIRST_FREE_OBJECTID,
1579                                 tmp, strlen(tmp), 0);
1580                 btrfs_release_path(path);
1581                 if (IS_ERR(di)) {
1582                         ret = PTR_ERR(di);
1583                         goto out;
1584                 }
1585                 if (di) {
1586                         /* not unique, try again */
1587                         idx++;
1588                         continue;
1589                 }
1590                 /* unique */
1591                 break;
1592         }
1593 
1594         ret = fs_path_add(dest, tmp, strlen(tmp));
1595 
1596 out:
1597         btrfs_free_path(path);
1598         return ret;
1599 }
1600 
1601 enum inode_state {
1602         inode_state_no_change,
1603         inode_state_will_create,
1604         inode_state_did_create,
1605         inode_state_will_delete,
1606         inode_state_did_delete,
1607 };
1608 
1609 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1610 {
1611         int ret;
1612         int left_ret;
1613         int right_ret;
1614         u64 left_gen;
1615         u64 right_gen;
1616 
1617         ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1618                         NULL, NULL);
1619         if (ret < 0 && ret != -ENOENT)
1620                 goto out;
1621         left_ret = ret;
1622 
1623         if (!sctx->parent_root) {
1624                 right_ret = -ENOENT;
1625         } else {
1626                 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1627                                 NULL, NULL, NULL, NULL);
1628                 if (ret < 0 && ret != -ENOENT)
1629                         goto out;
1630                 right_ret = ret;
1631         }
1632 
1633         if (!left_ret && !right_ret) {
1634                 if (left_gen == gen && right_gen == gen) {
1635                         ret = inode_state_no_change;
1636                 } else if (left_gen == gen) {
1637                         if (ino < sctx->send_progress)
1638                                 ret = inode_state_did_create;
1639                         else
1640                                 ret = inode_state_will_create;
1641                 } else if (right_gen == gen) {
1642                         if (ino < sctx->send_progress)
1643                                 ret = inode_state_did_delete;
1644                         else
1645                                 ret = inode_state_will_delete;
1646                 } else  {
1647                         ret = -ENOENT;
1648                 }
1649         } else if (!left_ret) {
1650                 if (left_gen == gen) {
1651                         if (ino < sctx->send_progress)
1652                                 ret = inode_state_did_create;
1653                         else
1654                                 ret = inode_state_will_create;
1655                 } else {
1656                         ret = -ENOENT;
1657                 }
1658         } else if (!right_ret) {
1659                 if (right_gen == gen) {
1660                         if (ino < sctx->send_progress)
1661                                 ret = inode_state_did_delete;
1662                         else
1663                                 ret = inode_state_will_delete;
1664                 } else {
1665                         ret = -ENOENT;
1666                 }
1667         } else {
1668                 ret = -ENOENT;
1669         }
1670 
1671 out:
1672         return ret;
1673 }
1674 
1675 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1676 {
1677         int ret;
1678 
1679         if (ino == BTRFS_FIRST_FREE_OBJECTID)
1680                 return 1;
1681 
1682         ret = get_cur_inode_state(sctx, ino, gen);
1683         if (ret < 0)
1684                 goto out;
1685 
1686         if (ret == inode_state_no_change ||
1687             ret == inode_state_did_create ||
1688             ret == inode_state_will_delete)
1689                 ret = 1;
1690         else
1691                 ret = 0;
1692 
1693 out:
1694         return ret;
1695 }
1696 
1697 /*
1698  * Helper function to lookup a dir item in a dir.
1699  */
1700 static int lookup_dir_item_inode(struct btrfs_root *root,
1701                                  u64 dir, const char *name, int name_len,
1702                                  u64 *found_inode,
1703                                  u8 *found_type)
1704 {
1705         int ret = 0;
1706         struct btrfs_dir_item *di;
1707         struct btrfs_key key;
1708         struct btrfs_path *path;
1709 
1710         path = alloc_path_for_send();
1711         if (!path)
1712                 return -ENOMEM;
1713 
1714         di = btrfs_lookup_dir_item(NULL, root, path,
1715                         dir, name, name_len, 0);
1716         if (IS_ERR_OR_NULL(di)) {
1717                 ret = di ? PTR_ERR(di) : -ENOENT;
1718                 goto out;
1719         }
1720         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1721         if (key.type == BTRFS_ROOT_ITEM_KEY) {
1722                 ret = -ENOENT;
1723                 goto out;
1724         }
1725         *found_inode = key.objectid;
1726         *found_type = btrfs_dir_type(path->nodes[0], di);
1727 
1728 out:
1729         btrfs_free_path(path);
1730         return ret;
1731 }
1732 
1733 /*
1734  * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1735  * generation of the parent dir and the name of the dir entry.
1736  */
1737 static int get_first_ref(struct btrfs_root *root, u64 ino,
1738                          u64 *dir, u64 *dir_gen, struct fs_path *name)
1739 {
1740         int ret;
1741         struct btrfs_key key;
1742         struct btrfs_key found_key;
1743         struct btrfs_path *path;
1744         int len;
1745         u64 parent_dir;
1746 
1747         path = alloc_path_for_send();
1748         if (!path)
1749                 return -ENOMEM;
1750 
1751         key.objectid = ino;
1752         key.type = BTRFS_INODE_REF_KEY;
1753         key.offset = 0;
1754 
1755         ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1756         if (ret < 0)
1757                 goto out;
1758         if (!ret)
1759                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1760                                 path->slots[0]);
1761         if (ret || found_key.objectid != ino ||
1762             (found_key.type != BTRFS_INODE_REF_KEY &&
1763              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1764                 ret = -ENOENT;
1765                 goto out;
1766         }
1767 
1768         if (found_key.type == BTRFS_INODE_REF_KEY) {
1769                 struct btrfs_inode_ref *iref;
1770                 iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1771                                       struct btrfs_inode_ref);
1772                 len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1773                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1774                                                      (unsigned long)(iref + 1),
1775                                                      len);
1776                 parent_dir = found_key.offset;
1777         } else {
1778                 struct btrfs_inode_extref *extref;
1779                 extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1780                                         struct btrfs_inode_extref);
1781                 len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1782                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1783                                         (unsigned long)&extref->name, len);
1784                 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1785         }
1786         if (ret < 0)
1787                 goto out;
1788         btrfs_release_path(path);
1789 
1790         if (dir_gen) {
1791                 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
1792                                      NULL, NULL, NULL);
1793                 if (ret < 0)
1794                         goto out;
1795         }
1796 
1797         *dir = parent_dir;
1798 
1799 out:
1800         btrfs_free_path(path);
1801         return ret;
1802 }
1803 
1804 static int is_first_ref(struct btrfs_root *root,
1805                         u64 ino, u64 dir,
1806                         const char *name, int name_len)
1807 {
1808         int ret;
1809         struct fs_path *tmp_name;
1810         u64 tmp_dir;
1811 
1812         tmp_name = fs_path_alloc();
1813         if (!tmp_name)
1814                 return -ENOMEM;
1815 
1816         ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
1817         if (ret < 0)
1818                 goto out;
1819 
1820         if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1821                 ret = 0;
1822                 goto out;
1823         }
1824 
1825         ret = !memcmp(tmp_name->start, name, name_len);
1826 
1827 out:
1828         fs_path_free(tmp_name);
1829         return ret;
1830 }
1831 
1832 /*
1833  * Used by process_recorded_refs to determine if a new ref would overwrite an
1834  * already existing ref. In case it detects an overwrite, it returns the
1835  * inode/gen in who_ino/who_gen.
1836  * When an overwrite is detected, process_recorded_refs does proper orphanizing
1837  * to make sure later references to the overwritten inode are possible.
1838  * Orphanizing is however only required for the first ref of an inode.
1839  * process_recorded_refs does an additional is_first_ref check to see if
1840  * orphanizing is really required.
1841  */
1842 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1843                               const char *name, int name_len,
1844                               u64 *who_ino, u64 *who_gen, u64 *who_mode)
1845 {
1846         int ret = 0;
1847         u64 gen;
1848         u64 other_inode = 0;
1849         u8 other_type = 0;
1850 
1851         if (!sctx->parent_root)
1852                 goto out;
1853 
1854         ret = is_inode_existent(sctx, dir, dir_gen);
1855         if (ret <= 0)
1856                 goto out;
1857 
1858         /*
1859          * If we have a parent root we need to verify that the parent dir was
1860          * not deleted and then re-created, if it was then we have no overwrite
1861          * and we can just unlink this entry.
1862          */
1863         if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
1864                 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1865                                      NULL, NULL, NULL);
1866                 if (ret < 0 && ret != -ENOENT)
1867                         goto out;
1868                 if (ret) {
1869                         ret = 0;
1870                         goto out;
1871                 }
1872                 if (gen != dir_gen)
1873                         goto out;
1874         }
1875 
1876         ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1877                         &other_inode, &other_type);
1878         if (ret < 0 && ret != -ENOENT)
1879                 goto out;
1880         if (ret) {
1881                 ret = 0;
1882                 goto out;
1883         }
1884 
1885         /*
1886          * Check if the overwritten ref was already processed. If yes, the ref
1887          * was already unlinked/moved, so we can safely assume that we will not
1888          * overwrite anything at this point in time.
1889          */
1890         if (other_inode > sctx->send_progress ||
1891             is_waiting_for_move(sctx, other_inode)) {
1892                 ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1893                                 who_gen, who_mode, NULL, NULL, NULL);
1894                 if (ret < 0)
1895                         goto out;
1896 
1897                 ret = 1;
1898                 *who_ino = other_inode;
1899         } else {
1900                 ret = 0;
1901         }
1902 
1903 out:
1904         return ret;
1905 }
1906 
1907 /*
1908  * Checks if the ref was overwritten by an already processed inode. This is
1909  * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1910  * thus the orphan name needs be used.
1911  * process_recorded_refs also uses it to avoid unlinking of refs that were
1912  * overwritten.
1913  */
1914 static int did_overwrite_ref(struct send_ctx *sctx,
1915                             u64 dir, u64 dir_gen,
1916                             u64 ino, u64 ino_gen,
1917                             const char *name, int name_len)
1918 {
1919         int ret = 0;
1920         u64 gen;
1921         u64 ow_inode;
1922         u8 other_type;
1923 
1924         if (!sctx->parent_root)
1925                 goto out;
1926 
1927         ret = is_inode_existent(sctx, dir, dir_gen);
1928         if (ret <= 0)
1929                 goto out;
1930 
1931         if (dir != BTRFS_FIRST_FREE_OBJECTID) {
1932                 ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
1933                                      NULL, NULL, NULL);
1934                 if (ret < 0 && ret != -ENOENT)
1935                         goto out;
1936                 if (ret) {
1937                         ret = 0;
1938                         goto out;
1939                 }
1940                 if (gen != dir_gen)
1941                         goto out;
1942         }
1943 
1944         /* check if the ref was overwritten by another ref */
1945         ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1946                         &ow_inode, &other_type);
1947         if (ret < 0 && ret != -ENOENT)
1948                 goto out;
1949         if (ret) {
1950                 /* was never and will never be overwritten */
1951                 ret = 0;
1952                 goto out;
1953         }
1954 
1955         ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1956                         NULL, NULL);
1957         if (ret < 0)
1958                 goto out;
1959 
1960         if (ow_inode == ino && gen == ino_gen) {
1961                 ret = 0;
1962                 goto out;
1963         }
1964 
1965         /*
1966          * We know that it is or will be overwritten. Check this now.
1967          * The current inode being processed might have been the one that caused
1968          * inode 'ino' to be orphanized, therefore check if ow_inode matches
1969          * the current inode being processed.
1970          */
1971         if ((ow_inode < sctx->send_progress) ||
1972             (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
1973              gen == sctx->cur_inode_gen))
1974                 ret = 1;
1975         else
1976                 ret = 0;
1977 
1978 out:
1979         return ret;
1980 }
1981 
1982 /*
1983  * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1984  * that got overwritten. This is used by process_recorded_refs to determine
1985  * if it has to use the path as returned by get_cur_path or the orphan name.
1986  */
1987 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1988 {
1989         int ret = 0;
1990         struct fs_path *name = NULL;
1991         u64 dir;
1992         u64 dir_gen;
1993 
1994         if (!sctx->parent_root)
1995                 goto out;
1996 
1997         name = fs_path_alloc();
1998         if (!name)
1999                 return -ENOMEM;
2000 
2001         ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
2002         if (ret < 0)
2003                 goto out;
2004 
2005         ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
2006                         name->start, fs_path_len(name));
2007 
2008 out:
2009         fs_path_free(name);
2010         return ret;
2011 }
2012 
2013 /*
2014  * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
2015  * so we need to do some special handling in case we have clashes. This function
2016  * takes care of this with the help of name_cache_entry::radix_list.
2017  * In case of error, nce is kfreed.
2018  */
2019 static int name_cache_insert(struct send_ctx *sctx,
2020                              struct name_cache_entry *nce)
2021 {
2022         int ret = 0;
2023         struct list_head *nce_head;
2024 
2025         nce_head = radix_tree_lookup(&sctx->name_cache,
2026                         (unsigned long)nce->ino);
2027         if (!nce_head) {
2028                 nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
2029                 if (!nce_head) {
2030                         kfree(nce);
2031                         return -ENOMEM;
2032                 }
2033                 INIT_LIST_HEAD(nce_head);
2034 
2035                 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
2036                 if (ret < 0) {
2037                         kfree(nce_head);
2038                         kfree(nce);
2039                         return ret;
2040                 }
2041         }
2042         list_add_tail(&nce->radix_list, nce_head);
2043         list_add_tail(&nce->list, &sctx->name_cache_list);
2044         sctx->name_cache_size++;
2045 
2046         return ret;
2047 }
2048 
2049 static void name_cache_delete(struct send_ctx *sctx,
2050                               struct name_cache_entry *nce)
2051 {
2052         struct list_head *nce_head;
2053 
2054         nce_head = radix_tree_lookup(&sctx->name_cache,
2055                         (unsigned long)nce->ino);
2056         if (!nce_head) {
2057                 btrfs_err(sctx->send_root->fs_info,
2058               "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
2059                         nce->ino, sctx->name_cache_size);
2060         }
2061 
2062         list_del(&nce->radix_list);
2063         list_del(&nce->list);
2064         sctx->name_cache_size--;
2065 
2066         /*
2067          * We may not get to the final release of nce_head if the lookup fails
2068          */
2069         if (nce_head && list_empty(nce_head)) {
2070                 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
2071                 kfree(nce_head);
2072         }
2073 }
2074 
2075 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
2076                                                     u64 ino, u64 gen)
2077 {
2078         struct list_head *nce_head;
2079         struct name_cache_entry *cur;
2080 
2081         nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
2082         if (!nce_head)
2083                 return NULL;
2084 
2085         list_for_each_entry(cur, nce_head, radix_list) {
2086                 if (cur->ino == ino && cur->gen == gen)
2087                         return cur;
2088         }
2089         return NULL;
2090 }
2091 
2092 /*
2093  * Removes the entry from the list and adds it back to the end. This marks the
2094  * entry as recently used so that name_cache_clean_unused does not remove it.
2095  */
2096 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
2097 {
2098         list_del(&nce->list);
2099         list_add_tail(&nce->list, &sctx->name_cache_list);
2100 }
2101 
2102 /*
2103  * Remove some entries from the beginning of name_cache_list.
2104  */
2105 static void name_cache_clean_unused(struct send_ctx *sctx)
2106 {
2107         struct name_cache_entry *nce;
2108 
2109         if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
2110                 return;
2111 
2112         while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
2113                 nce = list_entry(sctx->name_cache_list.next,
2114                                 struct name_cache_entry, list);
2115                 name_cache_delete(sctx, nce);
2116                 kfree(nce);
2117         }
2118 }
2119 
2120 static void name_cache_free(struct send_ctx *sctx)
2121 {
2122         struct name_cache_entry *nce;
2123 
2124         while (!list_empty(&sctx->name_cache_list)) {
2125                 nce = list_entry(sctx->name_cache_list.next,
2126                                 struct name_cache_entry, list);
2127                 name_cache_delete(sctx, nce);
2128                 kfree(nce);
2129         }
2130 }
2131 
2132 /*
2133  * Used by get_cur_path for each ref up to the root.
2134  * Returns 0 if it succeeded.
2135  * Returns 1 if the inode is not existent or got overwritten. In that case, the
2136  * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
2137  * is returned, parent_ino/parent_gen are not guaranteed to be valid.
2138  * Returns <0 in case of error.
2139  */
2140 static int __get_cur_name_and_parent(struct send_ctx *sctx,
2141                                      u64 ino, u64 gen,
2142                                      u64 *parent_ino,
2143                                      u64 *parent_gen,
2144                                      struct fs_path *dest)
2145 {
2146         int ret;
2147         int nce_ret;
2148         struct name_cache_entry *nce = NULL;
2149 
2150         /*
2151          * First check if we already did a call to this function with the same
2152          * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2153          * return the cached result.
2154          */
2155         nce = name_cache_search(sctx, ino, gen);
2156         if (nce) {
2157                 if (ino < sctx->send_progress && nce->need_later_update) {
2158                         name_cache_delete(sctx, nce);
2159                         kfree(nce);
2160                         nce = NULL;
2161                 } else {
2162                         name_cache_used(sctx, nce);
2163                         *parent_ino = nce->parent_ino;
2164                         *parent_gen = nce->parent_gen;
2165                         ret = fs_path_add(dest, nce->name, nce->name_len);
2166                         if (ret < 0)
2167                                 goto out;
2168                         ret = nce->ret;
2169                         goto out;
2170                 }
2171         }
2172 
2173         /*
2174          * If the inode is not existent yet, add the orphan name and return 1.
2175          * This should only happen for the parent dir that we determine in
2176          * __record_new_ref
2177          */
2178         ret = is_inode_existent(sctx, ino, gen);
2179         if (ret < 0)
2180                 goto out;
2181 
2182         if (!ret) {
2183                 ret = gen_unique_name(sctx, ino, gen, dest);
2184                 if (ret < 0)
2185                         goto out;
2186                 ret = 1;
2187                 goto out_cache;
2188         }
2189 
2190         /*
2191          * Depending on whether the inode was already processed or not, use
2192          * send_root or parent_root for ref lookup.
2193          */
2194         if (ino < sctx->send_progress)
2195                 ret = get_first_ref(sctx->send_root, ino,
2196                                     parent_ino, parent_gen, dest);
2197         else
2198                 ret = get_first_ref(sctx->parent_root, ino,
2199                                     parent_ino, parent_gen, dest);
2200         if (ret < 0)
2201                 goto out;
2202 
2203         /*
2204          * Check if the ref was overwritten by an inode's ref that was processed
2205          * earlier. If yes, treat as orphan and return 1.
2206          */
2207         ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2208                         dest->start, dest->end - dest->start);
2209         if (ret < 0)
2210                 goto out;
2211         if (ret) {
2212                 fs_path_reset(dest);
2213                 ret = gen_unique_name(sctx, ino, gen, dest);
2214                 if (ret < 0)
2215                         goto out;
2216                 ret = 1;
2217         }
2218 
2219 out_cache:
2220         /*
2221          * Store the result of the lookup in the name cache.
2222          */
2223         nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
2224         if (!nce) {
2225                 ret = -ENOMEM;
2226                 goto out;
2227         }
2228 
2229         nce->ino = ino;
2230         nce->gen = gen;
2231         nce->parent_ino = *parent_ino;
2232         nce->parent_gen = *parent_gen;
2233         nce->name_len = fs_path_len(dest);
2234         nce->ret = ret;
2235         strcpy(nce->name, dest->start);
2236 
2237         if (ino < sctx->send_progress)
2238                 nce->need_later_update = 0;
2239         else
2240                 nce->need_later_update = 1;
2241 
2242         nce_ret = name_cache_insert(sctx, nce);
2243         if (nce_ret < 0)
2244                 ret = nce_ret;
2245         name_cache_clean_unused(sctx);
2246 
2247 out:
2248         return ret;
2249 }
2250 
2251 /*
2252  * Magic happens here. This function returns the first ref to an inode as it
2253  * would look like while receiving the stream at this point in time.
2254  * We walk the path up to the root. For every inode in between, we check if it
2255  * was already processed/sent. If yes, we continue with the parent as found
2256  * in send_root. If not, we continue with the parent as found in parent_root.
2257  * If we encounter an inode that was deleted at this point in time, we use the
2258  * inodes "orphan" name instead of the real name and stop. Same with new inodes
2259  * that were not created yet and overwritten inodes/refs.
2260  *
2261  * When do we have orphan inodes:
2262  * 1. When an inode is freshly created and thus no valid refs are available yet
2263  * 2. When a directory lost all it's refs (deleted) but still has dir items
2264  *    inside which were not processed yet (pending for move/delete). If anyone
2265  *    tried to get the path to the dir items, it would get a path inside that
2266  *    orphan directory.
2267  * 3. When an inode is moved around or gets new links, it may overwrite the ref
2268  *    of an unprocessed inode. If in that case the first ref would be
2269  *    overwritten, the overwritten inode gets "orphanized". Later when we
2270  *    process this overwritten inode, it is restored at a new place by moving
2271  *    the orphan inode.
2272  *
2273  * sctx->send_progress tells this function at which point in time receiving
2274  * would be.
2275  */
2276 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2277                         struct fs_path *dest)
2278 {
2279         int ret = 0;
2280         struct fs_path *name = NULL;
2281         u64 parent_inode = 0;
2282         u64 parent_gen = 0;
2283         int stop = 0;
2284 
2285         name = fs_path_alloc();
2286         if (!name) {
2287                 ret = -ENOMEM;
2288                 goto out;
2289         }
2290 
2291         dest->reversed = 1;
2292         fs_path_reset(dest);
2293 
2294         while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2295                 struct waiting_dir_move *wdm;
2296 
2297                 fs_path_reset(name);
2298 
2299                 if (is_waiting_for_rm(sctx, ino)) {
2300                         ret = gen_unique_name(sctx, ino, gen, name);
2301                         if (ret < 0)
2302                                 goto out;
2303                         ret = fs_path_add_path(dest, name);
2304                         break;
2305                 }
2306 
2307                 wdm = get_waiting_dir_move(sctx, ino);
2308                 if (wdm && wdm->orphanized) {
2309                         ret = gen_unique_name(sctx, ino, gen, name);
2310                         stop = 1;
2311                 } else if (wdm) {
2312                         ret = get_first_ref(sctx->parent_root, ino,
2313                                             &parent_inode, &parent_gen, name);
2314                 } else {
2315                         ret = __get_cur_name_and_parent(sctx, ino, gen,
2316                                                         &parent_inode,
2317                                                         &parent_gen, name);
2318                         if (ret)
2319                                 stop = 1;
2320                 }
2321 
2322                 if (ret < 0)
2323                         goto out;
2324 
2325                 ret = fs_path_add_path(dest, name);
2326                 if (ret < 0)
2327                         goto out;
2328 
2329                 ino = parent_inode;
2330                 gen = parent_gen;
2331         }
2332 
2333 out:
2334         fs_path_free(name);
2335         if (!ret)
2336                 fs_path_unreverse(dest);
2337         return ret;
2338 }
2339 
2340 /*
2341  * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2342  */
2343 static int send_subvol_begin(struct send_ctx *sctx)
2344 {
2345         int ret;
2346         struct btrfs_root *send_root = sctx->send_root;
2347         struct btrfs_root *parent_root = sctx->parent_root;
2348         struct btrfs_path *path;
2349         struct btrfs_key key;
2350         struct btrfs_root_ref *ref;
2351         struct extent_buffer *leaf;
2352         char *name = NULL;
2353         int namelen;
2354 
2355         path = btrfs_alloc_path();
2356         if (!path)
2357                 return -ENOMEM;
2358 
2359         name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
2360         if (!name) {
2361                 btrfs_free_path(path);
2362                 return -ENOMEM;
2363         }
2364 
2365         key.objectid = send_root->root_key.objectid;
2366         key.type = BTRFS_ROOT_BACKREF_KEY;
2367         key.offset = 0;
2368 
2369         ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2370                                 &key, path, 1, 0);
2371         if (ret < 0)
2372                 goto out;
2373         if (ret) {
2374                 ret = -ENOENT;
2375                 goto out;
2376         }
2377 
2378         leaf = path->nodes[0];
2379         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2380         if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2381             key.objectid != send_root->root_key.objectid) {
2382                 ret = -ENOENT;
2383                 goto out;
2384         }
2385         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2386         namelen = btrfs_root_ref_name_len(leaf, ref);
2387         read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2388         btrfs_release_path(path);
2389 
2390         if (parent_root) {
2391                 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2392                 if (ret < 0)
2393                         goto out;
2394         } else {
2395                 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2396                 if (ret < 0)
2397                         goto out;
2398         }
2399 
2400         TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2401 
2402         if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
2403                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2404                             sctx->send_root->root_item.received_uuid);
2405         else
2406                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2407                             sctx->send_root->root_item.uuid);
2408 
2409         TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2410                     le64_to_cpu(sctx->send_root->root_item.ctransid));
2411         if (parent_root) {
2412                 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
2413                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2414                                      parent_root->root_item.received_uuid);
2415                 else
2416                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2417                                      parent_root->root_item.uuid);
2418                 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2419                             le64_to_cpu(sctx->parent_root->root_item.ctransid));
2420         }
2421 
2422         ret = send_cmd(sctx);
2423 
2424 tlv_put_failure:
2425 out:
2426         btrfs_free_path(path);
2427         kfree(name);
2428         return ret;
2429 }
2430 
2431 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2432 {
2433         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2434         int ret = 0;
2435         struct fs_path *p;
2436 
2437         btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
2438 
2439         p = fs_path_alloc();
2440         if (!p)
2441                 return -ENOMEM;
2442 
2443         ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2444         if (ret < 0)
2445                 goto out;
2446 
2447         ret = get_cur_path(sctx, ino, gen, p);
2448         if (ret < 0)
2449                 goto out;
2450         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2451         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2452 
2453         ret = send_cmd(sctx);
2454 
2455 tlv_put_failure:
2456 out:
2457         fs_path_free(p);
2458         return ret;
2459 }
2460 
2461 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2462 {
2463         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2464         int ret = 0;
2465         struct fs_path *p;
2466 
2467         btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
2468 
2469         p = fs_path_alloc();
2470         if (!p)
2471                 return -ENOMEM;
2472 
2473         ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2474         if (ret < 0)
2475                 goto out;
2476 
2477         ret = get_cur_path(sctx, ino, gen, p);
2478         if (ret < 0)
2479                 goto out;
2480         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2481         TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2482 
2483         ret = send_cmd(sctx);
2484 
2485 tlv_put_failure:
2486 out:
2487         fs_path_free(p);
2488         return ret;
2489 }
2490 
2491 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2492 {
2493         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2494         int ret = 0;
2495         struct fs_path *p;
2496 
2497         btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
2498                     ino, uid, gid);
2499 
2500         p = fs_path_alloc();
2501         if (!p)
2502                 return -ENOMEM;
2503 
2504         ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2505         if (ret < 0)
2506                 goto out;
2507 
2508         ret = get_cur_path(sctx, ino, gen, p);
2509         if (ret < 0)
2510                 goto out;
2511         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2512         TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2513         TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2514 
2515         ret = send_cmd(sctx);
2516 
2517 tlv_put_failure:
2518 out:
2519         fs_path_free(p);
2520         return ret;
2521 }
2522 
2523 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2524 {
2525         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2526         int ret = 0;
2527         struct fs_path *p = NULL;
2528         struct btrfs_inode_item *ii;
2529         struct btrfs_path *path = NULL;
2530         struct extent_buffer *eb;
2531         struct btrfs_key key;
2532         int slot;
2533 
2534         btrfs_debug(fs_info, "send_utimes %llu", ino);
2535 
2536         p = fs_path_alloc();
2537         if (!p)
2538                 return -ENOMEM;
2539 
2540         path = alloc_path_for_send();
2541         if (!path) {
2542                 ret = -ENOMEM;
2543                 goto out;
2544         }
2545 
2546         key.objectid = ino;
2547         key.type = BTRFS_INODE_ITEM_KEY;
2548         key.offset = 0;
2549         ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2550         if (ret > 0)
2551                 ret = -ENOENT;
2552         if (ret < 0)
2553                 goto out;
2554 
2555         eb = path->nodes[0];
2556         slot = path->slots[0];
2557         ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2558 
2559         ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2560         if (ret < 0)
2561                 goto out;
2562 
2563         ret = get_cur_path(sctx, ino, gen, p);
2564         if (ret < 0)
2565                 goto out;
2566         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2567         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2568         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2569         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2570         /* TODO Add otime support when the otime patches get into upstream */
2571 
2572         ret = send_cmd(sctx);
2573 
2574 tlv_put_failure:
2575 out:
2576         fs_path_free(p);
2577         btrfs_free_path(path);
2578         return ret;
2579 }
2580 
2581 /*
2582  * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2583  * a valid path yet because we did not process the refs yet. So, the inode
2584  * is created as orphan.
2585  */
2586 static int send_create_inode(struct send_ctx *sctx, u64 ino)
2587 {
2588         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2589         int ret = 0;
2590         struct fs_path *p;
2591         int cmd;
2592         u64 gen;
2593         u64 mode;
2594         u64 rdev;
2595 
2596         btrfs_debug(fs_info, "send_create_inode %llu", ino);
2597 
2598         p = fs_path_alloc();
2599         if (!p)
2600                 return -ENOMEM;
2601 
2602         if (ino != sctx->cur_ino) {
2603                 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
2604                                      NULL, NULL, &rdev);
2605                 if (ret < 0)
2606                         goto out;
2607         } else {
2608                 gen = sctx->cur_inode_gen;
2609                 mode = sctx->cur_inode_mode;
2610                 rdev = sctx->cur_inode_rdev;
2611         }
2612 
2613         if (S_ISREG(mode)) {
2614                 cmd = BTRFS_SEND_C_MKFILE;
2615         } else if (S_ISDIR(mode)) {
2616                 cmd = BTRFS_SEND_C_MKDIR;
2617         } else if (S_ISLNK(mode)) {
2618                 cmd = BTRFS_SEND_C_SYMLINK;
2619         } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2620                 cmd = BTRFS_SEND_C_MKNOD;
2621         } else if (S_ISFIFO(mode)) {
2622                 cmd = BTRFS_SEND_C_MKFIFO;
2623         } else if (S_ISSOCK(mode)) {
2624                 cmd = BTRFS_SEND_C_MKSOCK;
2625         } else {
2626                 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
2627                                 (int)(mode & S_IFMT));
2628                 ret = -EOPNOTSUPP;
2629                 goto out;
2630         }
2631 
2632         ret = begin_cmd(sctx, cmd);
2633         if (ret < 0)
2634                 goto out;
2635 
2636         ret = gen_unique_name(sctx, ino, gen, p);
2637         if (ret < 0)
2638                 goto out;
2639 
2640         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2641         TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2642 
2643         if (S_ISLNK(mode)) {
2644                 fs_path_reset(p);
2645                 ret = read_symlink(sctx->send_root, ino, p);
2646                 if (ret < 0)
2647                         goto out;
2648                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2649         } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2650                    S_ISFIFO(mode) || S_ISSOCK(mode)) {
2651                 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2652                 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2653         }
2654 
2655         ret = send_cmd(sctx);
2656         if (ret < 0)
2657                 goto out;
2658 
2659 
2660 tlv_put_failure:
2661 out:
2662         fs_path_free(p);
2663         return ret;
2664 }
2665 
2666 /*
2667  * We need some special handling for inodes that get processed before the parent
2668  * directory got created. See process_recorded_refs for details.
2669  * This function does the check if we already created the dir out of order.
2670  */
2671 static int did_create_dir(struct send_ctx *sctx, u64 dir)
2672 {
2673         int ret = 0;
2674         struct btrfs_path *path = NULL;
2675         struct btrfs_key key;
2676         struct btrfs_key found_key;
2677         struct btrfs_key di_key;
2678         struct extent_buffer *eb;
2679         struct btrfs_dir_item *di;
2680         int slot;
2681 
2682         path = alloc_path_for_send();
2683         if (!path) {
2684                 ret = -ENOMEM;
2685                 goto out;
2686         }
2687 
2688         key.objectid = dir;
2689         key.type = BTRFS_DIR_INDEX_KEY;
2690         key.offset = 0;
2691         ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2692         if (ret < 0)
2693                 goto out;
2694 
2695         while (1) {
2696                 eb = path->nodes[0];
2697                 slot = path->slots[0];
2698                 if (slot >= btrfs_header_nritems(eb)) {
2699                         ret = btrfs_next_leaf(sctx->send_root, path);
2700                         if (ret < 0) {
2701                                 goto out;
2702                         } else if (ret > 0) {
2703                                 ret = 0;
2704                                 break;
2705                         }
2706                         continue;
2707                 }
2708 
2709                 btrfs_item_key_to_cpu(eb, &found_key, slot);
2710                 if (found_key.objectid != key.objectid ||
2711                     found_key.type != key.type) {
2712                         ret = 0;
2713                         goto out;
2714                 }
2715 
2716                 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2717                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2718 
2719                 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2720                     di_key.objectid < sctx->send_progress) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724 
2725                 path->slots[0]++;
2726         }
2727 
2728 out:
2729         btrfs_free_path(path);
2730         return ret;
2731 }
2732 
2733 /*
2734  * Only creates the inode if it is:
2735  * 1. Not a directory
2736  * 2. Or a directory which was not created already due to out of order
2737  *    directories. See did_create_dir and process_recorded_refs for details.
2738  */
2739 static int send_create_inode_if_needed(struct send_ctx *sctx)
2740 {
2741         int ret;
2742 
2743         if (S_ISDIR(sctx->cur_inode_mode)) {
2744                 ret = did_create_dir(sctx, sctx->cur_ino);
2745                 if (ret < 0)
2746                         goto out;
2747                 if (ret) {
2748                         ret = 0;
2749                         goto out;
2750                 }
2751         }
2752 
2753         ret = send_create_inode(sctx, sctx->cur_ino);
2754         if (ret < 0)
2755                 goto out;
2756 
2757 out:
2758         return ret;
2759 }
2760 
2761 struct recorded_ref {
2762         struct list_head list;
2763         char *name;
2764         struct fs_path *full_path;
2765         u64 dir;
2766         u64 dir_gen;
2767         int name_len;
2768 };
2769 
2770 static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
2771 {
2772         ref->full_path = path;
2773         ref->name = (char *)kbasename(ref->full_path->start);
2774         ref->name_len = ref->full_path->end - ref->name;
2775 }
2776 
2777 /*
2778  * We need to process new refs before deleted refs, but compare_tree gives us
2779  * everything mixed. So we first record all refs and later process them.
2780  * This function is a helper to record one ref.
2781  */
2782 static int __record_ref(struct list_head *head, u64 dir,
2783                       u64 dir_gen, struct fs_path *path)
2784 {
2785         struct recorded_ref *ref;
2786 
2787         ref = kmalloc(sizeof(*ref), GFP_KERNEL);
2788         if (!ref)
2789                 return -ENOMEM;
2790 
2791         ref->dir = dir;
2792         ref->dir_gen = dir_gen;
2793         set_ref_path(ref, path);
2794         list_add_tail(&ref->list, head);
2795         return 0;
2796 }
2797 
2798 static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2799 {
2800         struct recorded_ref *new;
2801 
2802         new = kmalloc(sizeof(*ref), GFP_KERNEL);
2803         if (!new)
2804                 return -ENOMEM;
2805 
2806         new->dir = ref->dir;
2807         new->dir_gen = ref->dir_gen;
2808         new->full_path = NULL;
2809         INIT_LIST_HEAD(&new->list);
2810         list_add_tail(&new->list, list);
2811         return 0;
2812 }
2813 
2814 static void __free_recorded_refs(struct list_head *head)
2815 {
2816         struct recorded_ref *cur;
2817 
2818         while (!list_empty(head)) {
2819                 cur = list_entry(head->next, struct recorded_ref, list);
2820                 fs_path_free(cur->full_path);
2821                 list_del(&cur->list);
2822                 kfree(cur);
2823         }
2824 }
2825 
2826 static void free_recorded_refs(struct send_ctx *sctx)
2827 {
2828         __free_recorded_refs(&sctx->new_refs);
2829         __free_recorded_refs(&sctx->deleted_refs);
2830 }
2831 
2832 /*
2833  * Renames/moves a file/dir to its orphan name. Used when the first
2834  * ref of an unprocessed inode gets overwritten and for all non empty
2835  * directories.
2836  */
2837 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2838                           struct fs_path *path)
2839 {
2840         int ret;
2841         struct fs_path *orphan;
2842 
2843         orphan = fs_path_alloc();
2844         if (!orphan)
2845                 return -ENOMEM;
2846 
2847         ret = gen_unique_name(sctx, ino, gen, orphan);
2848         if (ret < 0)
2849                 goto out;
2850 
2851         ret = send_rename(sctx, path, orphan);
2852 
2853 out:
2854         fs_path_free(orphan);
2855         return ret;
2856 }
2857 
2858 static struct orphan_dir_info *
2859 add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2860 {
2861         struct rb_node **p = &sctx->orphan_dirs.rb_node;
2862         struct rb_node *parent = NULL;
2863         struct orphan_dir_info *entry, *odi;
2864 
2865         while (*p) {
2866                 parent = *p;
2867                 entry = rb_entry(parent, struct orphan_dir_info, node);
2868                 if (dir_ino < entry->ino) {
2869                         p = &(*p)->rb_left;
2870                 } else if (dir_ino > entry->ino) {
2871                         p = &(*p)->rb_right;
2872                 } else {
2873                         return entry;
2874                 }
2875         }
2876 
2877         odi = kmalloc(sizeof(*odi), GFP_KERNEL);
2878         if (!odi)
2879                 return ERR_PTR(-ENOMEM);
2880         odi->ino = dir_ino;
2881         odi->gen = 0;
2882         odi->last_dir_index_offset = 0;
2883 
2884         rb_link_node(&odi->node, parent, p);
2885         rb_insert_color(&odi->node, &sctx->orphan_dirs);
2886         return odi;
2887 }
2888 
2889 static struct orphan_dir_info *
2890 get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2891 {
2892         struct rb_node *n = sctx->orphan_dirs.rb_node;
2893         struct orphan_dir_info *entry;
2894 
2895         while (n) {
2896                 entry = rb_entry(n, struct orphan_dir_info, node);
2897                 if (dir_ino < entry->ino)
2898                         n = n->rb_left;
2899                 else if (dir_ino > entry->ino)
2900                         n = n->rb_right;
2901                 else
2902                         return entry;
2903         }
2904         return NULL;
2905 }
2906 
2907 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2908 {
2909         struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2910 
2911         return odi != NULL;
2912 }
2913 
2914 static void free_orphan_dir_info(struct send_ctx *sctx,
2915                                  struct orphan_dir_info *odi)
2916 {
2917         if (!odi)
2918                 return;
2919         rb_erase(&odi->node, &sctx->orphan_dirs);
2920         kfree(odi);
2921 }
2922 
2923 /*
2924  * Returns 1 if a directory can be removed at this point in time.
2925  * We check this by iterating all dir items and checking if the inode behind
2926  * the dir item was already processed.
2927  */
2928 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2929                      u64 send_progress)
2930 {
2931         int ret = 0;
2932         struct btrfs_root *root = sctx->parent_root;
2933         struct btrfs_path *path;
2934         struct btrfs_key key;
2935         struct btrfs_key found_key;
2936         struct btrfs_key loc;
2937         struct btrfs_dir_item *di;
2938         struct orphan_dir_info *odi = NULL;
2939 
2940         /*
2941          * Don't try to rmdir the top/root subvolume dir.
2942          */
2943         if (dir == BTRFS_FIRST_FREE_OBJECTID)
2944                 return 0;
2945 
2946         path = alloc_path_for_send();
2947         if (!path)
2948                 return -ENOMEM;
2949 
2950         key.objectid = dir;
2951         key.type = BTRFS_DIR_INDEX_KEY;
2952         key.offset = 0;
2953 
2954         odi = get_orphan_dir_info(sctx, dir);
2955         if (odi)
2956                 key.offset = odi->last_dir_index_offset;
2957 
2958         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2959         if (ret < 0)
2960                 goto out;
2961 
2962         while (1) {
2963                 struct waiting_dir_move *dm;
2964 
2965                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2966                         ret = btrfs_next_leaf(root, path);
2967                         if (ret < 0)
2968                                 goto out;
2969                         else if (ret > 0)
2970                                 break;
2971                         continue;
2972                 }
2973                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2974                                       path->slots[0]);
2975                 if (found_key.objectid != key.objectid ||
2976                     found_key.type != key.type)
2977                         break;
2978 
2979                 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2980                                 struct btrfs_dir_item);
2981                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2982 
2983                 dm = get_waiting_dir_move(sctx, loc.objectid);
2984                 if (dm) {
2985                         odi = add_orphan_dir_info(sctx, dir);
2986                         if (IS_ERR(odi)) {
2987                                 ret = PTR_ERR(odi);
2988                                 goto out;
2989                         }
2990                         odi->gen = dir_gen;
2991                         odi->last_dir_index_offset = found_key.offset;
2992                         dm->rmdir_ino = dir;
2993                         ret = 0;
2994                         goto out;
2995                 }
2996 
2997                 if (loc.objectid > send_progress) {
2998                         odi = add_orphan_dir_info(sctx, dir);
2999                         if (IS_ERR(odi)) {
3000                                 ret = PTR_ERR(odi);
3001                                 goto out;
3002                         }
3003                         odi->gen = dir_gen;
3004                         odi->last_dir_index_offset = found_key.offset;
3005                         ret = 0;
3006                         goto out;
3007                 }
3008 
3009                 path->slots[0]++;
3010         }
3011         free_orphan_dir_info(sctx, odi);
3012 
3013         ret = 1;
3014 
3015 out:
3016         btrfs_free_path(path);
3017         return ret;
3018 }
3019 
3020 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
3021 {
3022         struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
3023 
3024         return entry != NULL;
3025 }
3026 
3027 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
3028 {
3029         struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
3030         struct rb_node *parent = NULL;
3031         struct waiting_dir_move *entry, *dm;
3032 
3033         dm = kmalloc(sizeof(*dm), GFP_KERNEL);
3034         if (!dm)
3035                 return -ENOMEM;
3036         dm->ino = ino;
3037         dm->rmdir_ino = 0;
3038         dm->orphanized = orphanized;
3039 
3040         while (*p) {
3041                 parent = *p;
3042                 entry = rb_entry(parent, struct waiting_dir_move, node);
3043                 if (ino < entry->ino) {
3044                         p = &(*p)->rb_left;
3045                 } else if (ino > entry->ino) {
3046                         p = &(*p)->rb_right;
3047                 } else {
3048                         kfree(dm);
3049                         return -EEXIST;
3050                 }
3051         }
3052 
3053         rb_link_node(&dm->node, parent, p);
3054         rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
3055         return 0;
3056 }
3057 
3058 static struct waiting_dir_move *
3059 get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
3060 {
3061         struct rb_node *n = sctx->waiting_dir_moves.rb_node;
3062         struct waiting_dir_move *entry;
3063 
3064         while (n) {
3065                 entry = rb_entry(n, struct waiting_dir_move, node);
3066                 if (ino < entry->ino)
3067                         n = n->rb_left;
3068                 else if (ino > entry->ino)
3069                         n = n->rb_right;
3070                 else
3071                         return entry;
3072         }
3073         return NULL;
3074 }
3075 
3076 static void free_waiting_dir_move(struct send_ctx *sctx,
3077                                   struct waiting_dir_move *dm)
3078 {
3079         if (!dm)
3080                 return;
3081         rb_erase(&dm->node, &sctx->waiting_dir_moves);
3082         kfree(dm);
3083 }
3084 
3085 static int add_pending_dir_move(struct send_ctx *sctx,
3086                                 u64 ino,
3087                                 u64 ino_gen,
3088                                 u64 parent_ino,
3089                                 struct list_head *new_refs,
3090                                 struct list_head *deleted_refs,
3091                                 const bool is_orphan)
3092 {
3093         struct rb_node **p = &sctx->pending_dir_moves.rb_node;
3094         struct rb_node *parent = NULL;
3095         struct pending_dir_move *entry = NULL, *pm;
3096         struct recorded_ref *cur;
3097         int exists = 0;
3098         int ret;
3099 
3100         pm = kmalloc(sizeof(*pm), GFP_KERNEL);
3101         if (!pm)
3102                 return -ENOMEM;
3103         pm->parent_ino = parent_ino;
3104         pm->ino = ino;
3105         pm->gen = ino_gen;
3106         INIT_LIST_HEAD(&pm->list);
3107         INIT_LIST_HEAD(&pm->update_refs);
3108         RB_CLEAR_NODE(&pm->node);
3109 
3110         while (*p) {
3111                 parent = *p;
3112                 entry = rb_entry(parent, struct pending_dir_move, node);
3113                 if (parent_ino < entry->parent_ino) {
3114                         p = &(*p)->rb_left;
3115                 } else if (parent_ino > entry->parent_ino) {
3116                         p = &(*p)->rb_right;
3117                 } else {
3118                         exists = 1;
3119                         break;
3120                 }
3121         }
3122 
3123         list_for_each_entry(cur, deleted_refs, list) {
3124                 ret = dup_ref(cur, &pm->update_refs);
3125                 if (ret < 0)
3126                         goto out;
3127         }
3128         list_for_each_entry(cur, new_refs, list) {
3129                 ret = dup_ref(cur, &pm->update_refs);
3130                 if (ret < 0)
3131                         goto out;
3132         }
3133 
3134         ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
3135         if (ret)
3136                 goto out;
3137 
3138         if (exists) {
3139                 list_add_tail(&pm->list, &entry->list);
3140         } else {
3141                 rb_link_node(&pm->node, parent, p);
3142                 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
3143         }
3144         ret = 0;
3145 out:
3146         if (ret) {
3147                 __free_recorded_refs(&pm->update_refs);
3148                 kfree(pm);
3149         }
3150         return ret;
3151 }
3152 
3153 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3154                                                       u64 parent_ino)
3155 {
3156         struct rb_node *n = sctx->pending_dir_moves.rb_node;
3157         struct pending_dir_move *entry;
3158 
3159         while (n) {
3160                 entry = rb_entry(n, struct pending_dir_move, node);
3161                 if (parent_ino < entry->parent_ino)
3162                         n = n->rb_left;
3163                 else if (parent_ino > entry->parent_ino)
3164                         n = n->rb_right;
3165                 else
3166                         return entry;
3167         }
3168         return NULL;
3169 }
3170 
3171 static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3172                      u64 ino, u64 gen, u64 *ancestor_ino)
3173 {
3174         int ret = 0;
3175         u64 parent_inode = 0;
3176         u64 parent_gen = 0;
3177         u64 start_ino = ino;
3178 
3179         *ancestor_ino = 0;
3180         while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3181                 fs_path_reset(name);
3182 
3183                 if (is_waiting_for_rm(sctx, ino))
3184                         break;
3185                 if (is_waiting_for_move(sctx, ino)) {
3186                         if (*ancestor_ino == 0)
3187                                 *ancestor_ino = ino;
3188                         ret = get_first_ref(sctx->parent_root, ino,
3189                                             &parent_inode, &parent_gen, name);
3190                 } else {
3191                         ret = __get_cur_name_and_parent(sctx, ino, gen,
3192                                                         &parent_inode,
3193                                                         &parent_gen, name);
3194                         if (ret > 0) {
3195                                 ret = 0;
3196                                 break;
3197                         }
3198                 }
3199                 if (ret < 0)
3200                         break;
3201                 if (parent_inode == start_ino) {
3202                         ret = 1;
3203                         if (*ancestor_ino == 0)
3204                                 *ancestor_ino = ino;
3205                         break;
3206                 }
3207                 ino = parent_inode;
3208                 gen = parent_gen;
3209         }
3210         return ret;
3211 }
3212 
3213 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3214 {
3215         struct fs_path *from_path = NULL;
3216         struct fs_path *to_path = NULL;
3217         struct fs_path *name = NULL;
3218         u64 orig_progress = sctx->send_progress;
3219         struct recorded_ref *cur;
3220         u64 parent_ino, parent_gen;
3221         struct waiting_dir_move *dm = NULL;
3222         u64 rmdir_ino = 0;
3223         u64 ancestor;
3224         bool is_orphan;
3225         int ret;
3226 
3227         name = fs_path_alloc();
3228         from_path = fs_path_alloc();
3229         if (!name || !from_path) {
3230                 ret = -ENOMEM;
3231                 goto out;
3232         }
3233 
3234         dm = get_waiting_dir_move(sctx, pm->ino);
3235         ASSERT(dm);
3236         rmdir_ino = dm->rmdir_ino;
3237         is_orphan = dm->orphanized;
3238         free_waiting_dir_move(sctx, dm);
3239 
3240         if (is_orphan) {
3241                 ret = gen_unique_name(sctx, pm->ino,
3242                                       pm->gen, from_path);
3243         } else {
3244                 ret = get_first_ref(sctx->parent_root, pm->ino,
3245                                     &parent_ino, &parent_gen, name);
3246                 if (ret < 0)
3247                         goto out;
3248                 ret = get_cur_path(sctx, parent_ino, parent_gen,
3249                                    from_path);
3250                 if (ret < 0)
3251                         goto out;
3252                 ret = fs_path_add_path(from_path, name);
3253         }
3254         if (ret < 0)
3255                 goto out;
3256 
3257         sctx->send_progress = sctx->cur_ino + 1;
3258         ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3259         if (ret < 0)
3260                 goto out;
3261         if (ret) {
3262                 LIST_HEAD(deleted_refs);
3263                 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3264                 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3265                                            &pm->update_refs, &deleted_refs,
3266                                            is_orphan);
3267                 if (ret < 0)
3268                         goto out;
3269                 if (rmdir_ino) {
3270                         dm = get_waiting_dir_move(sctx, pm->ino);
3271                         ASSERT(dm);
3272                         dm->rmdir_ino = rmdir_ino;
3273                 }
3274                 goto out;
3275         }
3276         fs_path_reset(name);
3277         to_path = name;
3278         name = NULL;
3279         ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3280         if (ret < 0)
3281                 goto out;
3282 
3283         ret = send_rename(sctx, from_path, to_path);
3284         if (ret < 0)
3285                 goto out;
3286 
3287         if (rmdir_ino) {
3288                 struct orphan_dir_info *odi;
3289                 u64 gen;
3290 
3291                 odi = get_orphan_dir_info(sctx, rmdir_ino);
3292                 if (!odi) {
3293                         /* already deleted */
3294                         goto finish;
3295                 }
3296                 gen = odi->gen;
3297 
3298                 ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
3299                 if (ret < 0)
3300                         goto out;
3301                 if (!ret)
3302                         goto finish;
3303 
3304                 name = fs_path_alloc();
3305                 if (!name) {
3306                         ret = -ENOMEM;
3307                         goto out;
3308                 }
3309                 ret = get_cur_path(sctx, rmdir_ino, gen, name);
3310                 if (ret < 0)
3311                         goto out;
3312                 ret = send_rmdir(sctx, name);
3313                 if (ret < 0)
3314                         goto out;
3315         }
3316 
3317 finish:
3318         ret = send_utimes(sctx, pm->ino, pm->gen);
3319         if (ret < 0)
3320                 goto out;
3321 
3322         /*
3323          * After rename/move, need to update the utimes of both new parent(s)
3324          * and old parent(s).
3325          */
3326         list_for_each_entry(cur, &pm->update_refs, list) {
3327                 /*
3328                  * The parent inode might have been deleted in the send snapshot
3329                  */
3330                 ret = get_inode_info(sctx->send_root, cur->dir, NULL,
3331                                      NULL, NULL, NULL, NULL, NULL);
3332                 if (ret == -ENOENT) {
3333                         ret = 0;
3334                         continue;
3335                 }
3336                 if (ret < 0)
3337                         goto out;
3338 
3339                 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3340                 if (ret < 0)
3341                         goto out;
3342         }
3343 
3344 out:
3345         fs_path_free(name);
3346         fs_path_free(from_path);
3347         fs_path_free(to_path);
3348         sctx->send_progress = orig_progress;
3349 
3350         return ret;
3351 }
3352 
3353 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3354 {
3355         if (!list_empty(&m->list))
3356                 list_del(&m->list);
3357         if (!RB_EMPTY_NODE(&m->node))
3358                 rb_erase(&m->node, &sctx->pending_dir_moves);
3359         __free_recorded_refs(&m->update_refs);
3360         kfree(m);
3361 }
3362 
3363 static void tail_append_pending_moves(struct send_ctx *sctx,
3364                                       struct pending_dir_move *moves,
3365                                       struct list_head *stack)
3366 {
3367         if (list_empty(&moves->list)) {
3368                 list_add_tail(&moves->list, stack);
3369         } else {
3370                 LIST_HEAD(list);
3371                 list_splice_init(&moves->list, &list);
3372                 list_add_tail(&moves->list, stack);
3373                 list_splice_tail(&list, stack);
3374         }
3375         if (!RB_EMPTY_NODE(&moves->node)) {
3376                 rb_erase(&moves->node, &sctx->pending_dir_moves);
3377                 RB_CLEAR_NODE(&moves->node);
3378         }
3379 }
3380 
3381 static int apply_children_dir_moves(struct send_ctx *sctx)
3382 {
3383         struct pending_dir_move *pm;
3384         struct list_head stack;
3385         u64 parent_ino = sctx->cur_ino;
3386         int ret = 0;
3387 
3388         pm = get_pending_dir_moves(sctx, parent_ino);
3389         if (!pm)
3390                 return 0;
3391 
3392         INIT_LIST_HEAD(&stack);
3393         tail_append_pending_moves(sctx, pm, &stack);
3394 
3395         while (!list_empty(&stack)) {
3396                 pm = list_first_entry(&stack, struct pending_dir_move, list);
3397                 parent_ino = pm->ino;
3398                 ret = apply_dir_move(sctx, pm);
3399                 free_pending_move(sctx, pm);
3400                 if (ret)
3401                         goto out;
3402                 pm = get_pending_dir_moves(sctx, parent_ino);
3403                 if (pm)
3404                         tail_append_pending_moves(sctx, pm, &stack);
3405         }
3406         return 0;
3407 
3408 out:
3409         while (!list_empty(&stack)) {
3410                 pm = list_first_entry(&stack, struct pending_dir_move, list);
3411                 free_pending_move(sctx, pm);
3412         }
3413         return ret;
3414 }
3415 
3416 /*
3417  * We might need to delay a directory rename even when no ancestor directory
3418  * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3419  * renamed. This happens when we rename a directory to the old name (the name
3420  * in the parent root) of some other unrelated directory that got its rename
3421  * delayed due to some ancestor with higher number that got renamed.
3422  *
3423  * Example:
3424  *
3425  * Parent snapshot:
3426  * .                                       (ino 256)
3427  * |---- a/                                (ino 257)
3428  * |     |---- file                        (ino 260)
3429  * |
3430  * |---- b/                                (ino 258)
3431  * |---- c/                                (ino 259)
3432  *
3433  * Send snapshot:
3434  * .                                       (ino 256)
3435  * |---- a/                                (ino 258)
3436  * |---- x/                                (ino 259)
3437  *       |---- y/                          (ino 257)
3438  *             |----- file                 (ino 260)
3439  *
3440  * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3441  * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3442  * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3443  * must issue is:
3444  *
3445  * 1 - rename 259 from 'c' to 'x'
3446  * 2 - rename 257 from 'a' to 'x/y'
3447  * 3 - rename 258 from 'b' to 'a'
3448  *
3449  * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3450  * be done right away and < 0 on error.
3451  */
3452 static int wait_for_dest_dir_move(struct send_ctx *sctx,
3453                                   struct recorded_ref *parent_ref,
3454                                   const bool is_orphan)
3455 {
3456         struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
3457         struct btrfs_path *path;
3458         struct btrfs_key key;
3459         struct btrfs_key di_key;
3460         struct btrfs_dir_item *di;
3461         u64 left_gen;
3462         u64 right_gen;
3463         int ret = 0;
3464         struct waiting_dir_move *wdm;
3465 
3466         if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3467                 return 0;
3468 
3469         path = alloc_path_for_send();
3470         if (!path)
3471                 return -ENOMEM;
3472 
3473         key.objectid = parent_ref->dir;
3474         key.type = BTRFS_DIR_ITEM_KEY;
3475         key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3476 
3477         ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3478         if (ret < 0) {
3479                 goto out;
3480         } else if (ret > 0) {
3481                 ret = 0;
3482                 goto out;
3483         }
3484 
3485         di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
3486                                        parent_ref->name_len);
3487         if (!di) {
3488                 ret = 0;
3489                 goto out;
3490         }
3491         /*
3492          * di_key.objectid has the number of the inode that has a dentry in the
3493          * parent directory with the same name that sctx->cur_ino is being
3494          * renamed to. We need to check if that inode is in the send root as
3495          * well and if it is currently marked as an inode with a pending rename,
3496          * if it is, we need to delay the rename of sctx->cur_ino as well, so
3497          * that it happens after that other inode is renamed.
3498          */
3499         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3500         if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3501                 ret = 0;
3502                 goto out;
3503         }
3504 
3505         ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3506                              &left_gen, NULL, NULL, NULL, NULL);
3507         if (ret < 0)
3508                 goto out;
3509         ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3510                              &right_gen, NULL, NULL, NULL, NULL);
3511         if (ret < 0) {
3512                 if (ret == -ENOENT)
3513                         ret = 0;
3514                 goto out;
3515         }
3516 
3517         /* Different inode, no need to delay the rename of sctx->cur_ino */
3518         if (right_gen != left_gen) {
3519                 ret = 0;
3520                 goto out;
3521         }
3522 
3523         wdm = get_waiting_dir_move(sctx, di_key.objectid);
3524         if (wdm && !wdm->orphanized) {
3525                 ret = add_pending_dir_move(sctx,
3526                                            sctx->cur_ino,
3527                                            sctx->cur_inode_gen,
3528                                            di_key.objectid,
3529                                            &sctx->new_refs,
3530                                            &sctx->deleted_refs,
3531                                            is_orphan);
3532                 if (!ret)
3533                         ret = 1;
3534         }
3535 out:
3536         btrfs_free_path(path);
3537         return ret;
3538 }
3539 
3540 /*
3541  * Check if inode ino2, or any of its ancestors, is inode ino1.
3542  * Return 1 if true, 0 if false and < 0 on error.
3543  */
3544 static int check_ino_in_path(struct btrfs_root *root,
3545                              const u64 ino1,
3546                              const u64 ino1_gen,
3547                              const u64 ino2,
3548                              const u64 ino2_gen,
3549                              struct fs_path *fs_path)
3550 {
3551         u64 ino = ino2;
3552 
3553         if (ino1 == ino2)
3554                 return ino1_gen == ino2_gen;
3555 
3556         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3557                 u64 parent;
3558                 u64 parent_gen;
3559                 int ret;
3560 
3561                 fs_path_reset(fs_path);
3562                 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
3563                 if (ret < 0)
3564                         return ret;
3565                 if (parent == ino1)
3566                         return parent_gen == ino1_gen;
3567                 ino = parent;
3568         }
3569         return 0;
3570 }
3571 
3572 /*
3573  * Check if ino ino1 is an ancestor of inode ino2 in the given root for any
3574  * possible path (in case ino2 is not a directory and has multiple hard links).
3575  * Return 1 if true, 0 if false and < 0 on error.
3576  */
3577 static int is_ancestor(struct btrfs_root *root,
3578                        const u64 ino1,
3579                        const u64 ino1_gen,
3580                        const u64 ino2,
3581                        struct fs_path *fs_path)
3582 {
3583         bool free_fs_path = false;
3584         int ret = 0;
3585         struct btrfs_path *path = NULL;
3586         struct btrfs_key key;
3587 
3588         if (!fs_path) {
3589                 fs_path = fs_path_alloc();
3590                 if (!fs_path)
3591                         return -ENOMEM;
3592                 free_fs_path = true;
3593         }
3594 
3595         path = alloc_path_for_send();
3596         if (!path) {
3597                 ret = -ENOMEM;
3598                 goto out;
3599         }
3600 
3601         key.objectid = ino2;
3602         key.type = BTRFS_INODE_REF_KEY;
3603         key.offset = 0;
3604 
3605         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3606         if (ret < 0)
3607                 goto out;
3608 
3609         while (true) {
3610                 struct extent_buffer *leaf = path->nodes[0];
3611                 int slot = path->slots[0];
3612                 u32 cur_offset = 0;
3613                 u32 item_size;
3614 
3615                 if (slot >= btrfs_header_nritems(leaf)) {
3616                         ret = btrfs_next_leaf(root, path);
3617                         if (ret < 0)
3618                                 goto out;
3619                         if (ret > 0)
3620                                 break;
3621                         continue;
3622                 }
3623 
3624                 btrfs_item_key_to_cpu(leaf, &key, slot);
3625                 if (key.objectid != ino2)
3626                         break;
3627                 if (key.type != BTRFS_INODE_REF_KEY &&
3628                     key.type != BTRFS_INODE_EXTREF_KEY)
3629                         break;
3630 
3631                 item_size = btrfs_item_size_nr(leaf, slot);
3632                 while (cur_offset < item_size) {
3633                         u64 parent;
3634                         u64 parent_gen;
3635 
3636                         if (key.type == BTRFS_INODE_EXTREF_KEY) {
3637                                 unsigned long ptr;
3638                                 struct btrfs_inode_extref *extref;
3639 
3640                                 ptr = btrfs_item_ptr_offset(leaf, slot);
3641                                 extref = (struct btrfs_inode_extref *)
3642                                         (ptr + cur_offset);
3643                                 parent = btrfs_inode_extref_parent(leaf,
3644                                                                    extref);
3645                                 cur_offset += sizeof(*extref);
3646                                 cur_offset += btrfs_inode_extref_name_len(leaf,
3647                                                                   extref);
3648                         } else {
3649                                 parent = key.offset;
3650                                 cur_offset = item_size;
3651                         }
3652 
3653                         ret = get_inode_info(root, parent, NULL, &parent_gen,
3654                                              NULL, NULL, NULL, NULL);
3655                         if (ret < 0)
3656                                 goto out;
3657                         ret = check_ino_in_path(root, ino1, ino1_gen,
3658                                                 parent, parent_gen, fs_path);
3659                         if (ret)
3660                                 goto out;
3661                 }
3662                 path->slots[0]++;
3663         }
3664         ret = 0;
3665  out:
3666         btrfs_free_path(path);
3667         if (free_fs_path)
3668                 fs_path_free(fs_path);
3669         return ret;
3670 }
3671 
3672 static int wait_for_parent_move(struct send_ctx *sctx,
3673                                 struct recorded_ref *parent_ref,
3674                                 const bool is_orphan)
3675 {
3676         int ret = 0;
3677         u64 ino = parent_ref->dir;
3678         u64 ino_gen = parent_ref->dir_gen;
3679         u64 parent_ino_before, parent_ino_after;
3680         struct fs_path *path_before = NULL;
3681         struct fs_path *path_after = NULL;
3682         int len1, len2;
3683 
3684         path_after = fs_path_alloc();
3685         path_before = fs_path_alloc();
3686         if (!path_after || !path_before) {
3687                 ret = -ENOMEM;
3688                 goto out;
3689         }
3690 
3691         /*
3692          * Our current directory inode may not yet be renamed/moved because some
3693          * ancestor (immediate or not) has to be renamed/moved first. So find if
3694          * such ancestor exists and make sure our own rename/move happens after
3695          * that ancestor is processed to avoid path build infinite loops (done
3696          * at get_cur_path()).
3697          */
3698         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3699                 u64 parent_ino_after_gen;
3700 
3701                 if (is_waiting_for_move(sctx, ino)) {
3702                         /*
3703                          * If the current inode is an ancestor of ino in the
3704                          * parent root, we need to delay the rename of the
3705                          * current inode, otherwise don't delayed the rename
3706                          * because we can end up with a circular dependency
3707                          * of renames, resulting in some directories never
3708                          * getting the respective rename operations issued in
3709                          * the send stream or getting into infinite path build
3710                          * loops.
3711                          */
3712                         ret = is_ancestor(sctx->parent_root,
3713                                           sctx->cur_ino, sctx->cur_inode_gen,
3714                                           ino, path_before);
3715                         if (ret)
3716                                 break;
3717                 }
3718 
3719                 fs_path_reset(path_before);
3720                 fs_path_reset(path_after);
3721 
3722                 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3723                                     &parent_ino_after_gen, path_after);
3724                 if (ret < 0)
3725                         goto out;
3726                 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3727                                     NULL, path_before);
3728                 if (ret < 0 && ret != -ENOENT) {
3729                         goto out;
3730                 } else if (ret == -ENOENT) {
3731                         ret = 0;
3732                         break;
3733                 }
3734 
3735                 len1 = fs_path_len(path_before);
3736                 len2 = fs_path_len(path_after);
3737                 if (ino > sctx->cur_ino &&
3738                     (parent_ino_before != parent_ino_after || len1 != len2 ||
3739                      memcmp(path_before->start, path_after->start, len1))) {
3740                         u64 parent_ino_gen;
3741 
3742                         ret = get_inode_info(sctx->parent_root, ino, NULL,
3743                                              &parent_ino_gen, NULL, NULL, NULL,
3744                                              NULL);
3745                         if (ret < 0)
3746                                 goto out;
3747                         if (ino_gen == parent_ino_gen) {
3748                                 ret = 1;
3749                                 break;
3750                         }
3751                 }
3752                 ino = parent_ino_after;
3753                 ino_gen = parent_ino_after_gen;
3754         }
3755 
3756 out:
3757         fs_path_free(path_before);
3758         fs_path_free(path_after);
3759 
3760         if (ret == 1) {
3761                 ret = add_pending_dir_move(sctx,
3762                                            sctx->cur_ino,
3763                                            sctx->cur_inode_gen,
3764                                            ino,
3765                                            &sctx->new_refs,
3766                                            &sctx->deleted_refs,
3767                                            is_orphan);
3768                 if (!ret)
3769                         ret = 1;
3770         }
3771 
3772         return ret;
3773 }
3774 
3775 static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
3776 {
3777         int ret;
3778         struct fs_path *new_path;
3779 
3780         /*
3781          * Our reference's name member points to its full_path member string, so
3782          * we use here a new path.
3783          */
3784         new_path = fs_path_alloc();
3785         if (!new_path)
3786                 return -ENOMEM;
3787 
3788         ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
3789         if (ret < 0) {
3790                 fs_path_free(new_path);
3791                 return ret;
3792         }
3793         ret = fs_path_add(new_path, ref->name, ref->name_len);
3794         if (ret < 0) {
3795                 fs_path_free(new_path);
3796                 return ret;
3797         }
3798 
3799         fs_path_free(ref->full_path);
3800         set_ref_path(ref, new_path);
3801 
3802         return 0;
3803 }
3804 
3805 /*
3806  * This does all the move/link/unlink/rmdir magic.
3807  */
3808 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3809 {
3810         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
3811         int ret = 0;
3812         struct recorded_ref *cur;
3813         struct recorded_ref *cur2;
3814         struct list_head check_dirs;
3815         struct fs_path *valid_path = NULL;
3816         u64 ow_inode = 0;
3817         u64 ow_gen;
3818         u64 ow_mode;
3819         int did_overwrite = 0;
3820         int is_orphan = 0;
3821         u64 last_dir_ino_rm = 0;
3822         bool can_rename = true;
3823         bool orphanized_dir = false;
3824         bool orphanized_ancestor = false;
3825 
3826         btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
3827 
3828         /*
3829          * This should never happen as the root dir always has the same ref
3830          * which is always '..'
3831          */
3832         BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3833         INIT_LIST_HEAD(&check_dirs);
3834 
3835         valid_path = fs_path_alloc();
3836         if (!valid_path) {
3837                 ret = -ENOMEM;
3838                 goto out;
3839         }
3840 
3841         /*
3842          * First, check if the first ref of the current inode was overwritten
3843          * before. If yes, we know that the current inode was already orphanized
3844          * and thus use the orphan name. If not, we can use get_cur_path to
3845          * get the path of the first ref as it would like while receiving at
3846          * this point in time.
3847          * New inodes are always orphan at the beginning, so force to use the
3848          * orphan name in this case.
3849          * The first ref is stored in valid_path and will be updated if it
3850          * gets moved around.
3851          */
3852         if (!sctx->cur_inode_new) {
3853                 ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3854                                 sctx->cur_inode_gen);
3855                 if (ret < 0)
3856                         goto out;
3857                 if (ret)
3858                         did_overwrite = 1;
3859         }
3860         if (sctx->cur_inode_new || did_overwrite) {
3861                 ret = gen_unique_name(sctx, sctx->cur_ino,
3862                                 sctx->cur_inode_gen, valid_path);
3863                 if (ret < 0)
3864                         goto out;
3865                 is_orphan = 1;
3866         } else {
3867                 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3868                                 valid_path);
3869                 if (ret < 0)
3870                         goto out;
3871         }
3872 
3873         list_for_each_entry(cur, &sctx->new_refs, list) {
3874                 /*
3875                  * We may have refs where the parent directory does not exist
3876                  * yet. This happens if the parent directories inum is higher
3877                  * than the current inum. To handle this case, we create the
3878                  * parent directory out of order. But we need to check if this
3879                  * did already happen before due to other refs in the same dir.
3880                  */
3881                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3882                 if (ret < 0)
3883                         goto out;
3884                 if (ret == inode_state_will_create) {
3885                         ret = 0;
3886                         /*
3887                          * First check if any of the current inodes refs did
3888                          * already create the dir.
3889                          */
3890                         list_for_each_entry(cur2, &sctx->new_refs, list) {
3891                                 if (cur == cur2)
3892                                         break;
3893                                 if (cur2->dir == cur->dir) {
3894                                         ret = 1;
3895                                         break;
3896                                 }
3897                         }
3898 
3899                         /*
3900                          * If that did not happen, check if a previous inode
3901                          * did already create the dir.
3902                          */
3903                         if (!ret)
3904                                 ret = did_create_dir(sctx, cur->dir);
3905                         if (ret < 0)
3906                                 goto out;
3907                         if (!ret) {
3908                                 ret = send_create_inode(sctx, cur->dir);
3909                                 if (ret < 0)
3910                                         goto out;
3911                         }
3912                 }
3913 
3914                 /*
3915                  * Check if this new ref would overwrite the first ref of
3916                  * another unprocessed inode. If yes, orphanize the
3917                  * overwritten inode. If we find an overwritten ref that is
3918                  * not the first ref, simply unlink it.
3919                  */
3920                 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3921                                 cur->name, cur->name_len,
3922                                 &ow_inode, &ow_gen, &ow_mode);
3923                 if (ret < 0)
3924                         goto out;
3925                 if (ret) {
3926                         ret = is_first_ref(sctx->parent_root,
3927                                            ow_inode, cur->dir, cur->name,
3928                                            cur->name_len);
3929                         if (ret < 0)
3930                                 goto out;
3931                         if (ret) {
3932                                 struct name_cache_entry *nce;
3933                                 struct waiting_dir_move *wdm;
3934 
3935                                 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3936                                                 cur->full_path);
3937                                 if (ret < 0)
3938                                         goto out;
3939                                 if (S_ISDIR(ow_mode))
3940                                         orphanized_dir = true;
3941 
3942                                 /*
3943                                  * If ow_inode has its rename operation delayed
3944                                  * make sure that its orphanized name is used in
3945                                  * the source path when performing its rename
3946                                  * operation.
3947                                  */
3948                                 if (is_waiting_for_move(sctx, ow_inode)) {
3949                                         wdm = get_waiting_dir_move(sctx,
3950                                                                    ow_inode);
3951                                         ASSERT(wdm);
3952                                         wdm->orphanized = true;
3953                                 }
3954 
3955                                 /*
3956                                  * Make sure we clear our orphanized inode's
3957                                  * name from the name cache. This is because the
3958                                  * inode ow_inode might be an ancestor of some
3959                                  * other inode that will be orphanized as well
3960                                  * later and has an inode number greater than
3961                                  * sctx->send_progress. We need to prevent
3962                                  * future name lookups from using the old name
3963                                  * and get instead the orphan name.
3964                                  */
3965                                 nce = name_cache_search(sctx, ow_inode, ow_gen);
3966                                 if (nce) {
3967                                         name_cache_delete(sctx, nce);
3968                                         kfree(nce);
3969                                 }
3970 
3971                                 /*
3972                                  * ow_inode might currently be an ancestor of
3973                                  * cur_ino, therefore compute valid_path (the
3974                                  * current path of cur_ino) again because it
3975                                  * might contain the pre-orphanization name of
3976                                  * ow_inode, which is no longer valid.
3977                                  */
3978                                 ret = is_ancestor(sctx->parent_root,
3979                                                   ow_inode, ow_gen,
3980                                                   sctx->cur_ino, NULL);
3981                                 if (ret > 0) {
3982                                         orphanized_ancestor = true;
3983                                         fs_path_reset(valid_path);
3984                                         ret = get_cur_path(sctx, sctx->cur_ino,
3985                                                            sctx->cur_inode_gen,
3986                                                            valid_path);
3987                                 }
3988                                 if (ret < 0)
3989                                         goto out;
3990                         } else {
3991                                 ret = send_unlink(sctx, cur->full_path);
3992                                 if (ret < 0)
3993                                         goto out;
3994                         }
3995                 }
3996 
3997                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3998                         ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3999                         if (ret < 0)
4000                                 goto out;
4001                         if (ret == 1) {
4002                                 can_rename = false;
4003                                 *pending_move = 1;
4004                         }
4005                 }
4006 
4007                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
4008                     can_rename) {
4009                         ret = wait_for_parent_move(sctx, cur, is_orphan);
4010                         if (ret < 0)
4011                                 goto out;
4012                         if (ret == 1) {
4013                                 can_rename = false;
4014                                 *pending_move = 1;
4015                         }
4016                 }
4017 
4018                 /*
4019                  * link/move the ref to the new place. If we have an orphan
4020                  * inode, move it and update valid_path. If not, link or move
4021                  * it depending on the inode mode.
4022                  */
4023                 if (is_orphan && can_rename) {
4024                         ret = send_rename(sctx, valid_path, cur->full_path);
4025                         if (ret < 0)
4026                                 goto out;
4027                         is_orphan = 0;
4028                         ret = fs_path_copy(valid_path, cur->full_path);
4029                         if (ret < 0)
4030                                 goto out;
4031                 } else if (can_rename) {
4032                         if (S_ISDIR(sctx->cur_inode_mode)) {
4033                                 /*
4034                                  * Dirs can't be linked, so move it. For moved
4035                                  * dirs, we always have one new and one deleted
4036                                  * ref. The deleted ref is ignored later.
4037                                  */
4038                                 ret = send_rename(sctx, valid_path,
4039                                                   cur->full_path);
4040                                 if (!ret)
4041                                         ret = fs_path_copy(valid_path,
4042                                                            cur->full_path);
4043                                 if (ret < 0)
4044                                         goto out;
4045                         } else {
4046                                 /*
4047                                  * We might have previously orphanized an inode
4048                                  * which is an ancestor of our current inode,
4049                                  * so our reference's full path, which was
4050                                  * computed before any such orphanizations, must
4051                                  * be updated.
4052                                  */
4053                                 if (orphanized_dir) {
4054                                         ret = update_ref_path(sctx, cur);
4055                                         if (ret < 0)
4056                                                 goto out;
4057                                 }
4058                                 ret = send_link(sctx, cur->full_path,
4059                                                 valid_path);
4060                                 if (ret < 0)
4061                                         goto out;
4062                         }
4063                 }
4064                 ret = dup_ref(cur, &check_dirs);
4065                 if (ret < 0)
4066                         goto out;
4067         }
4068 
4069         if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
4070                 /*
4071                  * Check if we can already rmdir the directory. If not,
4072                  * orphanize it. For every dir item inside that gets deleted
4073                  * later, we do this check again and rmdir it then if possible.
4074                  * See the use of check_dirs for more details.
4075                  */
4076                 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4077                                 sctx->cur_ino);
4078                 if (ret < 0)
4079                         goto out;
4080                 if (ret) {
4081                         ret = send_rmdir(sctx, valid_path);
4082                         if (ret < 0)
4083                                 goto out;
4084                 } else if (!is_orphan) {
4085                         ret = orphanize_inode(sctx, sctx->cur_ino,
4086                                         sctx->cur_inode_gen, valid_path);
4087                         if (ret < 0)
4088                                 goto out;
4089                         is_orphan = 1;
4090                 }
4091 
4092                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
4093                         ret = dup_ref(cur, &check_dirs);
4094                         if (ret < 0)
4095                                 goto out;
4096                 }
4097         } else if (S_ISDIR(sctx->cur_inode_mode) &&
4098                    !list_empty(&sctx->deleted_refs)) {
4099                 /*
4100                  * We have a moved dir. Add the old parent to check_dirs
4101                  */
4102                 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
4103                                 list);
4104                 ret = dup_ref(cur, &check_dirs);
4105                 if (ret < 0)
4106                         goto out;
4107         } else if (!S_ISDIR(sctx->cur_inode_mode)) {
4108                 /*
4109                  * We have a non dir inode. Go through all deleted refs and
4110                  * unlink them if they were not already overwritten by other
4111                  * inodes.
4112                  */
4113                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
4114                         ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
4115                                         sctx->cur_ino, sctx->cur_inode_gen,
4116                                         cur->name, cur->name_len);
4117                         if (ret < 0)
4118                                 goto out;
4119                         if (!ret) {
4120                                 /*
4121                                  * If we orphanized any ancestor before, we need
4122                                  * to recompute the full path for deleted names,
4123                                  * since any such path was computed before we
4124                                  * processed any references and orphanized any
4125                                  * ancestor inode.
4126                                  */
4127                                 if (orphanized_ancestor) {
4128                                         ret = update_ref_path(sctx, cur);
4129                                         if (ret < 0)
4130                                                 goto out;
4131                                 }
4132                                 ret = send_unlink(sctx, cur->full_path);
4133                                 if (ret < 0)
4134                                         goto out;
4135                         }
4136                         ret = dup_ref(cur, &check_dirs);
4137                         if (ret < 0)
4138                                 goto out;
4139                 }
4140                 /*
4141                  * If the inode is still orphan, unlink the orphan. This may
4142                  * happen when a previous inode did overwrite the first ref
4143                  * of this inode and no new refs were added for the current
4144                  * inode. Unlinking does not mean that the inode is deleted in
4145                  * all cases. There may still be links to this inode in other
4146                  * places.
4147                  */
4148                 if (is_orphan) {
4149                         ret = send_unlink(sctx, valid_path);
4150                         if (ret < 0)
4151                                 goto out;
4152                 }
4153         }
4154 
4155         /*
4156          * We did collect all parent dirs where cur_inode was once located. We
4157          * now go through all these dirs and check if they are pending for
4158          * deletion and if it's finally possible to perform the rmdir now.
4159          * We also update the inode stats of the parent dirs here.
4160          */
4161         list_for_each_entry(cur, &check_dirs, list) {
4162                 /*
4163                  * In case we had refs into dirs that were not processed yet,
4164                  * we don't need to do the utime and rmdir logic for these dirs.
4165                  * The dir will be processed later.
4166                  */
4167                 if (cur->dir > sctx->cur_ino)
4168                         continue;
4169 
4170                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
4171                 if (ret < 0)
4172                         goto out;
4173 
4174                 if (ret == inode_state_did_create ||
4175                     ret == inode_state_no_change) {
4176                         /* TODO delayed utimes */
4177                         ret = send_utimes(sctx, cur->dir, cur->dir_gen);
4178                         if (ret < 0)
4179                                 goto out;
4180                 } else if (ret == inode_state_did_delete &&
4181                            cur->dir != last_dir_ino_rm) {
4182                         ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
4183                                         sctx->cur_ino);
4184                         if (ret < 0)
4185                                 goto out;
4186                         if (ret) {
4187                                 ret = get_cur_path(sctx, cur->dir,
4188                                                    cur->dir_gen, valid_path);
4189                                 if (ret < 0)
4190                                         goto out;
4191                                 ret = send_rmdir(sctx, valid_path);
4192                                 if (ret < 0)
4193                                         goto out;
4194                                 last_dir_ino_rm = cur->dir;
4195                         }
4196                 }
4197         }
4198 
4199         ret = 0;
4200 
4201 out:
4202         __free_recorded_refs(&check_dirs);
4203         free_recorded_refs(sctx);
4204         fs_path_free(valid_path);
4205         return ret;
4206 }
4207 
4208 static int record_ref(struct btrfs_root *root, u64 dir, struct fs_path *name,
4209                       void *ctx, struct list_head *refs)
4210 {
4211         int ret = 0;
4212         struct send_ctx *sctx = ctx;
4213         struct fs_path *p;
4214         u64 gen;
4215 
4216         p = fs_path_alloc();
4217         if (!p)
4218                 return -ENOMEM;
4219 
4220         ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL,
4221                         NULL, NULL);
4222         if (ret < 0)
4223                 goto out;
4224 
4225         ret = get_cur_path(sctx, dir, gen, p);
4226         if (ret < 0)
4227                 goto out;
4228         ret = fs_path_add_path(p, name);
4229         if (ret < 0)
4230                 goto out;
4231 
4232         ret = __record_ref(refs, dir, gen, p);
4233 
4234 out:
4235         if (ret)
4236                 fs_path_free(p);
4237         return ret;
4238 }
4239 
4240 static int __record_new_ref(int num, u64 dir, int index,
4241                             struct fs_path *name,
4242                             void *ctx)
4243 {
4244         struct send_ctx *sctx = ctx;
4245         return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs);
4246 }
4247 
4248 
4249 static int __record_deleted_ref(int num, u64 dir, int index,
4250                                 struct fs_path *name,
4251                                 void *ctx)
4252 {
4253         struct send_ctx *sctx = ctx;
4254         return record_ref(sctx->parent_root, dir, name, ctx,
4255                           &sctx->deleted_refs);
4256 }
4257 
4258 static int record_new_ref(struct send_ctx *sctx)
4259 {
4260         int ret;
4261 
4262         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4263                                 sctx->cmp_key, 0, __record_new_ref, sctx);
4264         if (ret < 0)
4265                 goto out;
4266         ret = 0;
4267 
4268 out:
4269         return ret;
4270 }
4271 
4272 static int record_deleted_ref(struct send_ctx *sctx)
4273 {
4274         int ret;
4275 
4276         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4277                                 sctx->cmp_key, 0, __record_deleted_ref, sctx);
4278         if (ret < 0)
4279                 goto out;
4280         ret = 0;
4281 
4282 out:
4283         return ret;
4284 }
4285 
4286 struct find_ref_ctx {
4287         u64 dir;
4288         u64 dir_gen;
4289         struct btrfs_root *root;
4290         struct fs_path *name;
4291         int found_idx;
4292 };
4293 
4294 static int __find_iref(int num, u64 dir, int index,
4295                        struct fs_path *name,
4296                        void *ctx_)
4297 {
4298         struct find_ref_ctx *ctx = ctx_;
4299         u64 dir_gen;
4300         int ret;
4301 
4302         if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
4303             strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
4304                 /*
4305                  * To avoid doing extra lookups we'll only do this if everything
4306                  * else matches.
4307                  */
4308                 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
4309                                      NULL, NULL, NULL);
4310                 if (ret)
4311                         return ret;
4312                 if (dir_gen != ctx->dir_gen)
4313                         return 0;
4314                 ctx->found_idx = num;
4315                 return 1;
4316         }
4317         return 0;
4318 }
4319 
4320 static int find_iref(struct btrfs_root *root,
4321                      struct btrfs_path *path,
4322                      struct btrfs_key *key,
4323                      u64 dir, u64 dir_gen, struct fs_path *name)
4324 {
4325         int ret;
4326         struct find_ref_ctx ctx;
4327 
4328         ctx.dir = dir;
4329         ctx.name = name;
4330         ctx.dir_gen = dir_gen;
4331         ctx.found_idx = -1;
4332         ctx.root = root;
4333 
4334         ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
4335         if (ret < 0)
4336                 return ret;
4337 
4338         if (ctx.found_idx == -1)
4339                 return -ENOENT;
4340 
4341         return ctx.found_idx;
4342 }
4343 
4344 static int __record_changed_new_ref(int num, u64 dir, int index,
4345                                     struct fs_path *name,
4346                                     void *ctx)
4347 {
4348         u64 dir_gen;
4349         int ret;
4350         struct send_ctx *sctx = ctx;
4351 
4352         ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
4353                              NULL, NULL, NULL);
4354         if (ret)
4355                 return ret;
4356 
4357         ret = find_iref(sctx->parent_root, sctx->right_path,
4358                         sctx->cmp_key, dir, dir_gen, name);
4359         if (ret == -ENOENT)
4360                 ret = __record_new_ref(num, dir, index, name, sctx);
4361         else if (ret > 0)
4362                 ret = 0;
4363 
4364         return ret;
4365 }
4366 
4367 static int __record_changed_deleted_ref(int num, u64 dir, int index,
4368                                         struct fs_path *name,
4369                                         void *ctx)
4370 {
4371         u64 dir_gen;
4372         int ret;
4373         struct send_ctx *sctx = ctx;
4374 
4375         ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
4376                              NULL, NULL, NULL);
4377         if (ret)
4378                 return ret;
4379 
4380         ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
4381                         dir, dir_gen, name);
4382         if (ret == -ENOENT)
4383                 ret = __record_deleted_ref(num, dir, index, name, sctx);
4384         else if (ret > 0)
4385                 ret = 0;
4386 
4387         return ret;
4388 }
4389 
4390 static int record_changed_ref(struct send_ctx *sctx)
4391 {
4392         int ret = 0;
4393 
4394         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4395                         sctx->cmp_key, 0, __record_changed_new_ref, sctx);
4396         if (ret < 0)
4397                 goto out;
4398         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4399                         sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
4400         if (ret < 0)
4401                 goto out;
4402         ret = 0;
4403 
4404 out:
4405         return ret;
4406 }
4407 
4408 /*
4409  * Record and process all refs at once. Needed when an inode changes the
4410  * generation number, which means that it was deleted and recreated.
4411  */
4412 static int process_all_refs(struct send_ctx *sctx,
4413                             enum btrfs_compare_tree_result cmd)
4414 {
4415         int ret;
4416         struct btrfs_root *root;
4417         struct btrfs_path *path;
4418         struct btrfs_key key;
4419         struct btrfs_key found_key;
4420         struct extent_buffer *eb;
4421         int slot;
4422         iterate_inode_ref_t cb;
4423         int pending_move = 0;
4424 
4425         path = alloc_path_for_send();
4426         if (!path)
4427                 return -ENOMEM;
4428 
4429         if (cmd == BTRFS_COMPARE_TREE_NEW) {
4430                 root = sctx->send_root;
4431                 cb = __record_new_ref;
4432         } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
4433                 root = sctx->parent_root;
4434                 cb = __record_deleted_ref;
4435         } else {
4436                 btrfs_err(sctx->send_root->fs_info,
4437                                 "Wrong command %d in process_all_refs", cmd);
4438                 ret = -EINVAL;
4439                 goto out;
4440         }
4441 
4442         key.objectid = sctx->cmp_key->objectid;
4443         key.type = BTRFS_INODE_REF_KEY;
4444         key.offset = 0;
4445         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4446         if (ret < 0)
4447                 goto out;
4448 
4449         while (1) {
4450                 eb = path->nodes[0];
4451                 slot = path->slots[0];
4452                 if (slot >= btrfs_header_nritems(eb)) {
4453                         ret = btrfs_next_leaf(root, path);
4454                         if (ret < 0)
4455                                 goto out;
4456                         else if (ret > 0)
4457                                 break;
4458                         continue;
4459                 }
4460 
4461                 btrfs_item_key_to_cpu(eb, &found_key, slot);
4462 
4463                 if (found_key.objectid != key.objectid ||
4464                     (found_key.type != BTRFS_INODE_REF_KEY &&
4465                      found_key.type != BTRFS_INODE_EXTREF_KEY))
4466                         break;
4467 
4468                 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
4469                 if (ret < 0)
4470                         goto out;
4471 
4472                 path->slots[0]++;
4473         }
4474         btrfs_release_path(path);
4475 
4476         /*
4477          * We don't actually care about pending_move as we are simply
4478          * re-creating this inode and will be rename'ing it into place once we
4479          * rename the parent directory.
4480          */
4481         ret = process_recorded_refs(sctx, &pending_move);
4482 out:
4483         btrfs_free_path(path);
4484         return ret;
4485 }
4486 
4487 static int send_set_xattr(struct send_ctx *sctx,
4488                           struct fs_path *path,
4489                           const char *name, int name_len,
4490                           const char *data, int data_len)
4491 {
4492         int ret = 0;
4493 
4494         ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
4495         if (ret < 0)
4496                 goto out;
4497 
4498         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4499         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4500         TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
4501 
4502         ret = send_cmd(sctx);
4503 
4504 tlv_put_failure:
4505 out:
4506         return ret;
4507 }
4508 
4509 static int send_remove_xattr(struct send_ctx *sctx,
4510                           struct fs_path *path,
4511                           const char *name, int name_len)
4512 {
4513         int ret = 0;
4514 
4515         ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
4516         if (ret < 0)
4517                 goto out;
4518 
4519         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4520         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4521 
4522         ret = send_cmd(sctx);
4523 
4524 tlv_put_failure:
4525 out:
4526         return ret;
4527 }
4528 
4529 static int __process_new_xattr(int num, struct btrfs_key *di_key,
4530                                const char *name, int name_len,
4531                                const char *data, int data_len,
4532                                u8 type, void *ctx)
4533 {
4534         int ret;
4535         struct send_ctx *sctx = ctx;
4536         struct fs_path *p;
4537         struct posix_acl_xattr_header dummy_acl;
4538 
4539         p = fs_path_alloc();
4540         if (!p)
4541                 return -ENOMEM;
4542 
4543         /*
4544          * This hack is needed because empty acls are stored as zero byte
4545          * data in xattrs. Problem with that is, that receiving these zero byte
4546          * acls will fail later. To fix this, we send a dummy acl list that
4547          * only contains the version number and no entries.
4548          */
4549         if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
4550             !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
4551                 if (data_len == 0) {
4552                         dummy_acl.a_version =
4553                                         cpu_to_le32(POSIX_ACL_XATTR_VERSION);
4554                         data = (char *)&dummy_acl;
4555                         data_len = sizeof(dummy_acl);
4556                 }
4557         }
4558 
4559         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4560         if (ret < 0)
4561                 goto out;
4562 
4563         ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
4564 
4565 out:
4566         fs_path_free(p);
4567         return ret;
4568 }
4569 
4570 static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
4571                                    const char *name, int name_len,
4572                                    const char *data, int data_len,
4573                                    u8 type, void *ctx)
4574 {
4575         int ret;
4576         struct send_ctx *sctx = ctx;
4577         struct fs_path *p;
4578 
4579         p = fs_path_alloc();
4580         if (!p)
4581                 return -ENOMEM;
4582 
4583         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4584         if (ret < 0)
4585                 goto out;
4586 
4587         ret = send_remove_xattr(sctx, p, name, name_len);
4588 
4589 out:
4590         fs_path_free(p);
4591         return ret;
4592 }
4593 
4594 static int process_new_xattr(struct send_ctx *sctx)
4595 {
4596         int ret = 0;
4597 
4598         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4599                                __process_new_xattr, sctx);
4600 
4601         return ret;
4602 }
4603 
4604 static int process_deleted_xattr(struct send_ctx *sctx)
4605 {
4606         return iterate_dir_item(sctx->parent_root, sctx->right_path,
4607                                 __process_deleted_xattr, sctx);
4608 }
4609 
4610 struct find_xattr_ctx {
4611         const char *name;
4612         int name_len;
4613         int found_idx;
4614         char *found_data;
4615         int found_data_len;
4616 };
4617 
4618 static int __find_xattr(int num, struct btrfs_key *di_key,
4619                         const char *name, int name_len,
4620                         const char *data, int data_len,
4621                         u8 type, void *vctx)
4622 {
4623         struct find_xattr_ctx *ctx = vctx;
4624 
4625         if (name_len == ctx->name_len &&
4626             strncmp(name, ctx->name, name_len) == 0) {
4627                 ctx->found_idx = num;
4628                 ctx->found_data_len = data_len;
4629                 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
4630                 if (!ctx->found_data)
4631                         return -ENOMEM;
4632                 return 1;
4633         }
4634         return 0;
4635 }
4636 
4637 static int find_xattr(struct btrfs_root *root,
4638                       struct btrfs_path *path,
4639                       struct btrfs_key *key,
4640                       const char *name, int name_len,
4641                       char **data, int *data_len)
4642 {
4643         int ret;
4644         struct find_xattr_ctx ctx;
4645 
4646         ctx.name = name;
4647         ctx.name_len = name_len;
4648         ctx.found_idx = -1;
4649         ctx.found_data = NULL;
4650         ctx.found_data_len = 0;
4651 
4652         ret = iterate_dir_item(root, path, __find_xattr, &ctx);
4653         if (ret < 0)
4654                 return ret;
4655 
4656         if (ctx.found_idx == -1)
4657                 return -ENOENT;
4658         if (data) {
4659                 *data = ctx.found_data;
4660                 *data_len = ctx.found_data_len;
4661         } else {
4662                 kfree(ctx.found_data);
4663         }
4664         return ctx.found_idx;
4665 }
4666 
4667 
4668 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4669                                        const char *name, int name_len,
4670                                        const char *data, int data_len,
4671                                        u8 type, void *ctx)
4672 {
4673         int ret;
4674         struct send_ctx *sctx = ctx;
4675         char *found_data = NULL;
4676         int found_data_len  = 0;
4677 
4678         ret = find_xattr(sctx->parent_root, sctx->right_path,
4679                          sctx->cmp_key, name, name_len, &found_data,
4680                          &found_data_len);
4681         if (ret == -ENOENT) {
4682                 ret = __process_new_xattr(num, di_key, name, name_len, data,
4683                                 data_len, type, ctx);
4684         } else if (ret >= 0) {
4685                 if (data_len != found_data_len ||
4686                     memcmp(data, found_data, data_len)) {
4687                         ret = __process_new_xattr(num, di_key, name, name_len,
4688                                         data, data_len, type, ctx);
4689                 } else {
4690                         ret = 0;
4691                 }
4692         }
4693 
4694         kfree(found_data);
4695         return ret;
4696 }
4697 
4698 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4699                                            const char *name, int name_len,
4700                                            const char *data, int data_len,
4701                                            u8 type, void *ctx)
4702 {
4703         int ret;
4704         struct send_ctx *sctx = ctx;
4705 
4706         ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4707                          name, name_len, NULL, NULL);
4708         if (ret == -ENOENT)
4709                 ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4710                                 data_len, type, ctx);
4711         else if (ret >= 0)
4712                 ret = 0;
4713 
4714         return ret;
4715 }
4716 
4717 static int process_changed_xattr(struct send_ctx *sctx)
4718 {
4719         int ret = 0;
4720 
4721         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4722                         __process_changed_new_xattr, sctx);
4723         if (ret < 0)
4724                 goto out;
4725         ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4726                         __process_changed_deleted_xattr, sctx);
4727 
4728 out:
4729         return ret;
4730 }
4731 
4732 static int process_all_new_xattrs(struct send_ctx *sctx)
4733 {
4734         int ret;
4735         struct btrfs_root *root;
4736         struct btrfs_path *path;
4737         struct btrfs_key key;
4738         struct btrfs_key found_key;
4739         struct extent_buffer *eb;
4740         int slot;
4741 
4742         path = alloc_path_for_send();
4743         if (!path)
4744                 return -ENOMEM;
4745 
4746         root = sctx->send_root;
4747 
4748         key.objectid = sctx->cmp_key->objectid;
4749         key.type = BTRFS_XATTR_ITEM_KEY;
4750         key.offset = 0;
4751         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4752         if (ret < 0)
4753                 goto out;
4754 
4755         while (1) {
4756                 eb = path->nodes[0];
4757                 slot = path->slots[0];
4758                 if (slot >= btrfs_header_nritems(eb)) {
4759                         ret = btrfs_next_leaf(root, path);
4760                         if (ret < 0) {
4761                                 goto out;
4762                         } else if (ret > 0) {
4763                                 ret = 0;
4764                                 break;
4765                         }
4766                         continue;
4767                 }
4768 
4769                 btrfs_item_key_to_cpu(eb, &found_key, slot);
4770                 if (found_key.objectid != key.objectid ||
4771                     found_key.type != key.type) {
4772                         ret = 0;
4773                         goto out;
4774                 }
4775 
4776                 ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
4777                 if (ret < 0)
4778                         goto out;
4779 
4780                 path->slots[0]++;
4781         }
4782 
4783 out:
4784         btrfs_free_path(path);
4785         return ret;
4786 }
4787 
4788 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4789 {
4790         struct btrfs_root *root = sctx->send_root;
4791         struct btrfs_fs_info *fs_info = root->fs_info;
4792         struct inode *inode;
4793         struct page *page;
4794         char *addr;
4795         struct btrfs_key key;
4796         pgoff_t index = offset >> PAGE_SHIFT;
4797         pgoff_t last_index;
4798         unsigned pg_offset = offset_in_page(offset);
4799         ssize_t ret = 0;
4800 
4801         key.objectid = sctx->cur_ino;
4802         key.type = BTRFS_INODE_ITEM_KEY;
4803         key.offset = 0;
4804 
4805         inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4806         if (IS_ERR(inode))
4807                 return PTR_ERR(inode);
4808 
4809         if (offset + len > i_size_read(inode)) {
4810                 if (offset > i_size_read(inode))
4811                         len = 0;
4812                 else
4813                         len = offset - i_size_read(inode);
4814         }
4815         if (len == 0)
4816                 goto out;
4817 
4818         last_index = (offset + len - 1) >> PAGE_SHIFT;
4819 
4820         /* initial readahead */
4821         memset(&sctx->ra, 0, sizeof(struct file_ra_state));
4822         file_ra_state_init(&sctx->ra, inode->i_mapping);
4823 
4824         while (index <= last_index) {
4825                 unsigned cur_len = min_t(unsigned, len,
4826                                          PAGE_SIZE - pg_offset);
4827 
4828                 page = find_lock_page(inode->i_mapping, index);
4829                 if (!page) {
4830                         page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
4831                                 NULL, index, last_index + 1 - index);
4832 
4833                         page = find_or_create_page(inode->i_mapping, index,
4834                                         GFP_KERNEL);
4835                         if (!page) {
4836                                 ret = -ENOMEM;
4837                                 break;
4838                         }
4839                 }
4840 
4841                 if (PageReadahead(page)) {
4842                         page_cache_async_readahead(inode->i_mapping, &sctx->ra,
4843                                 NULL, page, index, last_index + 1 - index);
4844                 }
4845 
4846                 if (!PageUptodate(page)) {
4847                         btrfs_readpage(NULL, page);
4848                         lock_page(page);
4849                         if (!PageUptodate(page)) {
4850                                 unlock_page(page);
4851                                 put_page(page);
4852                                 ret = -EIO;
4853                                 break;
4854                         }
4855                 }
4856 
4857                 addr = kmap(page);
4858                 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4859                 kunmap(page);
4860                 unlock_page(page);
4861                 put_page(page);
4862                 index++;
4863                 pg_offset = 0;
4864                 len -= cur_len;
4865                 ret += cur_len;
4866         }
4867 out:
4868         iput(inode);
4869         return ret;
4870 }
4871 
4872 /*
4873  * Read some bytes from the current inode/file and send a write command to
4874  * user space.
4875  */
4876 static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4877 {
4878         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
4879         int ret = 0;
4880         struct fs_path *p;
4881         ssize_t num_read = 0;
4882 
4883         p = fs_path_alloc();
4884         if (!p)
4885                 return -ENOMEM;
4886 
4887         btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
4888 
4889         num_read = fill_read_buf(sctx, offset, len);
4890         if (num_read <= 0) {
4891                 if (num_read < 0)
4892                         ret = num_read;
4893                 goto out;
4894         }
4895 
4896         ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4897         if (ret < 0)
4898                 goto out;
4899 
4900         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4901         if (ret < 0)
4902                 goto out;
4903 
4904         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4905         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4906         TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4907 
4908         ret = send_cmd(sctx);
4909 
4910 tlv_put_failure:
4911 out:
4912         fs_path_free(p);
4913         if (ret < 0)
4914                 return ret;
4915         return num_read;
4916 }
4917 
4918 /*
4919  * Send a clone command to user space.
4920  */
4921 static int send_clone(struct send_ctx *sctx,
4922                       u64 offset, u32 len,
4923                       struct clone_root *clone_root)
4924 {
4925         int ret = 0;
4926         struct fs_path *p;
4927         u64 gen;
4928 
4929         btrfs_debug(sctx->send_root->fs_info,
4930                     "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
4931                     offset, len, clone_root->root->root_key.objectid,
4932                     clone_root->ino, clone_root->offset);
4933 
4934         p = fs_path_alloc();
4935         if (!p)
4936                 return -ENOMEM;
4937 
4938         ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4939         if (ret < 0)
4940                 goto out;
4941 
4942         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4943         if (ret < 0)
4944                 goto out;
4945 
4946         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4947         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4948         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4949 
4950         if (clone_root->root == sctx->send_root) {
4951                 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4952                                 &gen, NULL, NULL, NULL, NULL);
4953                 if (ret < 0)
4954                         goto out;
4955                 ret = get_cur_path(sctx, clone_root->ino, gen, p);
4956         } else {
4957                 ret = get_inode_path(clone_root->root, clone_root->ino, p);
4958         }
4959         if (ret < 0)
4960                 goto out;
4961 
4962         /*
4963          * If the parent we're using has a received_uuid set then use that as
4964          * our clone source as that is what we will look for when doing a
4965          * receive.
4966          *
4967          * This covers the case that we create a snapshot off of a received
4968          * subvolume and then use that as the parent and try to receive on a
4969          * different host.
4970          */
4971         if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
4972                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4973                              clone_root->root->root_item.received_uuid);
4974         else
4975                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4976                              clone_root->root->root_item.uuid);
4977         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4978                     le64_to_cpu(clone_root->root->root_item.ctransid));
4979         TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4980         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4981                         clone_root->offset);
4982 
4983         ret = send_cmd(sctx);
4984 
4985 tlv_put_failure:
4986 out:
4987         fs_path_free(p);
4988         return ret;
4989 }
4990 
4991 /*
4992  * Send an update extent command to user space.
4993  */
4994 static int send_update_extent(struct send_ctx *sctx,
4995                               u64 offset, u32 len)
4996 {
4997         int ret = 0;
4998         struct fs_path *p;
4999 
5000         p = fs_path_alloc();
5001         if (!p)
5002                 return -ENOMEM;
5003 
5004         ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
5005         if (ret < 0)
5006                 goto out;
5007 
5008         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5009         if (ret < 0)
5010                 goto out;
5011 
5012         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
5013         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
5014         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
5015 
5016         ret = send_cmd(sctx);
5017 
5018 tlv_put_failure:
5019 out:
5020         fs_path_free(p);
5021         return ret;
5022 }
5023 
5024 static int send_hole(struct send_ctx *sctx, u64 end)
5025 {
5026         struct fs_path *p = NULL;
5027         u64 offset = sctx->cur_inode_last_extent;
5028         u64 len;
5029         int ret = 0;
5030 
5031         /*
5032          * A hole that starts at EOF or beyond it. Since we do not yet support
5033          * fallocate (for extent preallocation and hole punching), sending a
5034          * write of zeroes starting at EOF or beyond would later require issuing
5035          * a truncate operation which would undo the write and achieve nothing.
5036          */
5037         if (offset >= sctx->cur_inode_size)
5038                 return 0;
5039 
5040         /*
5041          * Don't go beyond the inode's i_size due to prealloc extents that start
5042          * after the i_size.
5043          */
5044         end = min_t(u64, end, sctx->cur_inode_size);
5045 
5046         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5047                 return send_update_extent(sctx, offset, end - offset);
5048 
5049         p = fs_path_alloc();
5050         if (!p)
5051                 return -ENOMEM;
5052         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5053         if (ret < 0)
5054                 goto tlv_put_failure;
5055         memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
5056         while (offset < end) {
5057                 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
5058 
5059                 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
5060                 if (ret < 0)
5061                         break;
5062                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
5063                 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
5064                 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
5065                 ret = send_cmd(sctx);
5066                 if (ret < 0)
5067                         break;
5068                 offset += len;
5069         }
5070         sctx->cur_inode_next_write_offset = offset;
5071 tlv_put_failure:
5072         fs_path_free(p);
5073         return ret;
5074 }
5075 
5076 static int send_extent_data(struct send_ctx *sctx,
5077                             const u64 offset,
5078                             const u64 len)
5079 {
5080         u64 sent = 0;
5081 
5082         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5083                 return send_update_extent(sctx, offset, len);
5084 
5085         while (sent < len) {
5086                 u64 size = len - sent;
5087                 int ret;
5088 
5089                 if (size > BTRFS_SEND_READ_SIZE)
5090                         size = BTRFS_SEND_READ_SIZE;
5091                 ret = send_write(sctx, offset + sent, size);
5092                 if (ret < 0)
5093                         return ret;
5094                 if (!ret)
5095                         break;
5096                 sent += ret;
5097         }
5098         return 0;
5099 }
5100 
5101 static int clone_range(struct send_ctx *sctx,
5102                        struct clone_root *clone_root,
5103                        const u64 disk_byte,
5104                        u64 data_offset,
5105                        u64 offset,
5106                        u64 len)
5107 {
5108         struct btrfs_path *path;
5109         struct btrfs_key key;
5110         int ret;
5111         u64 clone_src_i_size = 0;
5112 
5113         /*
5114          * Prevent cloning from a zero offset with a length matching the sector
5115          * size because in some scenarios this will make the receiver fail.
5116          *
5117          * For example, if in the source filesystem the extent at offset 0
5118          * has a length of sectorsize and it was written using direct IO, then
5119          * it can never be an inline extent (even if compression is enabled).
5120          * Then this extent can be cloned in the original filesystem to a non
5121          * zero file offset, but it may not be possible to clone in the
5122          * destination filesystem because it can be inlined due to compression
5123          * on the destination filesystem (as the receiver's write operations are
5124          * always done using buffered IO). The same happens when the original
5125          * filesystem does not have compression enabled but the destination
5126          * filesystem has.
5127          */
5128         if (clone_root->offset == 0 &&
5129             len == sctx->send_root->fs_info->sectorsize)
5130                 return send_extent_data(sctx, offset, len);
5131 
5132         path = alloc_path_for_send();
5133         if (!path)
5134                 return -ENOMEM;
5135 
5136         /*
5137          * There are inodes that have extents that lie behind its i_size. Don't
5138          * accept clones from these extents.
5139          */
5140         ret = __get_inode_info(clone_root->root, path, clone_root->ino,
5141                                &clone_src_i_size, NULL, NULL, NULL, NULL, NULL);
5142         btrfs_release_path(path);
5143         if (ret < 0)
5144                 goto out;
5145 
5146         /*
5147          * We can't send a clone operation for the entire range if we find
5148          * extent items in the respective range in the source file that
5149          * refer to different extents or if we find holes.
5150          * So check for that and do a mix of clone and regular write/copy
5151          * operations if needed.
5152          *
5153          * Example:
5154          *
5155          * mkfs.btrfs -f /dev/sda
5156          * mount /dev/sda /mnt
5157          * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
5158          * cp --reflink=always /mnt/foo /mnt/bar
5159          * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
5160          * btrfs subvolume snapshot -r /mnt /mnt/snap
5161          *
5162          * If when we send the snapshot and we are processing file bar (which
5163          * has a higher inode number than foo) we blindly send a clone operation
5164          * for the [0, 100K[ range from foo to bar, the receiver ends up getting
5165          * a file bar that matches the content of file foo - iow, doesn't match
5166          * the content from bar in the original filesystem.
5167          */
5168         key.objectid = clone_root->ino;
5169         key.type = BTRFS_EXTENT_DATA_KEY;
5170         key.offset = clone_root->offset;
5171         ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
5172         if (ret < 0)
5173                 goto out;
5174         if (ret > 0 && path->slots[0] > 0) {
5175                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
5176                 if (key.objectid == clone_root->ino &&
5177                     key.type == BTRFS_EXTENT_DATA_KEY)
5178                         path->slots[0]--;
5179         }
5180 
5181         while (true) {
5182                 struct extent_buffer *leaf = path->nodes[0];
5183                 int slot = path->slots[0];
5184                 struct btrfs_file_extent_item *ei;
5185                 u8 type;
5186                 u64 ext_len;
5187                 u64 clone_len;
5188                 u64 clone_data_offset;
5189 
5190                 if (slot >= btrfs_header_nritems(leaf)) {
5191                         ret = btrfs_next_leaf(clone_root->root, path);
5192                         if (ret < 0)
5193                                 goto out;
5194                         else if (ret > 0)
5195                                 break;
5196                         continue;
5197                 }
5198 
5199                 btrfs_item_key_to_cpu(leaf, &key, slot);
5200 
5201                 /*
5202                  * We might have an implicit trailing hole (NO_HOLES feature
5203                  * enabled). We deal with it after leaving this loop.
5204                  */
5205                 if (key.objectid != clone_root->ino ||
5206                     key.type != BTRFS_EXTENT_DATA_KEY)
5207                         break;
5208 
5209                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5210                 type = btrfs_file_extent_type(leaf, ei);
5211                 if (type == BTRFS_FILE_EXTENT_INLINE) {
5212                         ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
5213                         ext_len = PAGE_ALIGN(ext_len);
5214                 } else {
5215                         ext_len = btrfs_file_extent_num_bytes(leaf, ei);
5216                 }
5217 
5218                 if (key.offset + ext_len <= clone_root->offset)
5219                         goto next;
5220 
5221                 if (key.offset > clone_root->offset) {
5222                         /* Implicit hole, NO_HOLES feature enabled. */
5223                         u64 hole_len = key.offset - clone_root->offset;
5224 
5225                         if (hole_len > len)
5226                                 hole_len = len;
5227                         ret = send_extent_data(sctx, offset, hole_len);
5228                         if (ret < 0)
5229                                 goto out;
5230 
5231                         len -= hole_len;
5232                         if (len == 0)
5233                                 break;
5234                         offset += hole_len;
5235                         clone_root->offset += hole_len;
5236                         data_offset += hole_len;
5237                 }
5238 
5239                 if (key.offset >= clone_root->offset + len)
5240                         break;
5241 
5242                 if (key.offset >= clone_src_i_size)
5243                         break;
5244 
5245                 if (key.offset + ext_len > clone_src_i_size)
5246                         ext_len = clone_src_i_size - key.offset;
5247 
5248                 clone_data_offset = btrfs_file_extent_offset(leaf, ei);
5249                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
5250                         clone_root->offset = key.offset;
5251                         if (clone_data_offset < data_offset &&
5252                                 clone_data_offset + ext_len > data_offset) {
5253                                 u64 extent_offset;
5254 
5255                                 extent_offset = data_offset - clone_data_offset;
5256                                 ext_len -= extent_offset;
5257                                 clone_data_offset += extent_offset;
5258                                 clone_root->offset += extent_offset;
5259                         }
5260                 }
5261 
5262                 clone_len = min_t(u64, ext_len, len);
5263 
5264                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
5265                     clone_data_offset == data_offset) {
5266                         const u64 src_end = clone_root->offset + clone_len;
5267                         const u64 sectorsize = SZ_64K;
5268 
5269                         /*
5270                          * We can't clone the last block, when its size is not
5271                          * sector size aligned, into the middle of a file. If we
5272                          * do so, the receiver will get a failure (-EINVAL) when
5273                          * trying to clone or will silently corrupt the data in
5274                          * the destination file if it's on a kernel without the
5275                          * fix introduced by commit ac765f83f1397646
5276                          * ("Btrfs: fix data corruption due to cloning of eof
5277                          * block).
5278                          *
5279                          * So issue a clone of the aligned down range plus a
5280                          * regular write for the eof block, if we hit that case.
5281                          *
5282                          * Also, we use the maximum possible sector size, 64K,
5283                          * because we don't know what's the sector size of the
5284                          * filesystem that receives the stream, so we have to
5285                          * assume the largest possible sector size.
5286                          */
5287                         if (src_end == clone_src_i_size &&
5288                             !IS_ALIGNED(src_end, sectorsize) &&
5289                             offset + clone_len < sctx->cur_inode_size) {
5290                                 u64 slen;
5291 
5292                                 slen = ALIGN_DOWN(src_end - clone_root->offset,
5293                                                   sectorsize);
5294                                 if (slen > 0) {
5295                                         ret = send_clone(sctx, offset, slen,
5296                                                          clone_root);
5297                                         if (ret < 0)
5298                                                 goto out;
5299                                 }
5300                                 ret = send_extent_data(sctx, offset + slen,
5301                                                        clone_len - slen);
5302                         } else {
5303                                 ret = send_clone(sctx, offset, clone_len,
5304                                                  clone_root);
5305                         }
5306                 } else {
5307                         ret = send_extent_data(sctx, offset, clone_len);
5308                 }
5309 
5310                 if (ret < 0)
5311                         goto out;
5312 
5313                 len -= clone_len;
5314                 if (len == 0)
5315                         break;
5316                 offset += clone_len;
5317                 clone_root->offset += clone_len;
5318                 data_offset += clone_len;
5319 next:
5320                 path->slots[0]++;
5321         }
5322 
5323         if (len > 0)
5324                 ret = send_extent_data(sctx, offset, len);
5325         else
5326                 ret = 0;
5327 out:
5328         btrfs_free_path(path);
5329         return ret;
5330 }
5331 
5332 static int send_write_or_clone(struct send_ctx *sctx,
5333                                struct btrfs_path *path,
5334                                struct btrfs_key *key,
5335                                struct clone_root *clone_root)
5336 {
5337         int ret = 0;
5338         struct btrfs_file_extent_item *ei;
5339         u64 offset = key->offset;
5340         u64 len;
5341         u8 type;
5342         u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
5343 
5344         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5345                         struct btrfs_file_extent_item);
5346         type = btrfs_file_extent_type(path->nodes[0], ei);
5347         if (type == BTRFS_FILE_EXTENT_INLINE) {
5348                 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
5349                 /*
5350                  * it is possible the inline item won't cover the whole page,
5351                  * but there may be items after this page.  Make
5352                  * sure to send the whole thing
5353                  */
5354                 len = PAGE_ALIGN(len);
5355         } else {
5356                 len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
5357         }
5358 
5359         if (offset >= sctx->cur_inode_size) {
5360                 ret = 0;
5361                 goto out;
5362         }
5363         if (offset + len > sctx->cur_inode_size)
5364                 len = sctx->cur_inode_size - offset;
5365         if (len == 0) {
5366                 ret = 0;
5367                 goto out;
5368         }
5369 
5370         if (clone_root && IS_ALIGNED(offset + len, bs)) {
5371                 u64 disk_byte;
5372                 u64 data_offset;
5373 
5374                 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
5375                 data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
5376                 ret = clone_range(sctx, clone_root, disk_byte, data_offset,
5377                                   offset, len);
5378         } else {
5379                 ret = send_extent_data(sctx, offset, len);
5380         }
5381         sctx->cur_inode_next_write_offset = offset + len;
5382 out:
5383         return ret;
5384 }
5385 
5386 static int is_extent_unchanged(struct send_ctx *sctx,
5387                                struct btrfs_path *left_path,
5388                                struct btrfs_key *ekey)
5389 {
5390         int ret = 0;
5391         struct btrfs_key key;
5392         struct btrfs_path *path = NULL;
5393         struct extent_buffer *eb;
5394         int slot;
5395         struct btrfs_key found_key;
5396         struct btrfs_file_extent_item *ei;
5397         u64 left_disknr;
5398         u64 right_disknr;
5399         u64 left_offset;
5400         u64 right_offset;
5401         u64 left_offset_fixed;
5402         u64 left_len;
5403         u64 right_len;
5404         u64 left_gen;
5405         u64 right_gen;
5406         u8 left_type;
5407         u8 right_type;
5408 
5409         path = alloc_path_for_send();
5410         if (!path)
5411                 return -ENOMEM;
5412 
5413         eb = left_path->nodes[0];
5414         slot = left_path->slots[0];
5415         ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5416         left_type = btrfs_file_extent_type(eb, ei);
5417 
5418         if (left_type != BTRFS_FILE_EXTENT_REG) {
5419                 ret = 0;
5420                 goto out;
5421         }
5422         left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5423         left_len = btrfs_file_extent_num_bytes(eb, ei);
5424         left_offset = btrfs_file_extent_offset(eb, ei);
5425         left_gen = btrfs_file_extent_generation(eb, ei);
5426 
5427         /*
5428          * Following comments will refer to these graphics. L is the left
5429          * extents which we are checking at the moment. 1-8 are the right
5430          * extents that we iterate.
5431          *
5432          *       |-----L-----|
5433          * |-1-|-2a-|-3-|-4-|-5-|-6-|
5434          *
5435          *       |-----L-----|
5436          * |--1--|-2b-|...(same as above)
5437          *
5438          * Alternative situation. Happens on files where extents got split.
5439          *       |-----L-----|
5440          * |-----------7-----------|-6-|
5441          *
5442          * Alternative situation. Happens on files which got larger.
5443          *       |-----L-----|
5444          * |-8-|
5445          * Nothing follows after 8.
5446          */
5447 
5448         key.objectid = ekey->objectid;
5449         key.type = BTRFS_EXTENT_DATA_KEY;
5450         key.offset = ekey->offset;
5451         ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
5452         if (ret < 0)
5453                 goto out;
5454         if (ret) {
5455                 ret = 0;
5456                 goto out;
5457         }
5458 
5459         /*
5460          * Handle special case where the right side has no extents at all.
5461          */
5462         eb = path->nodes[0];
5463         slot = path->slots[0];
5464         btrfs_item_key_to_cpu(eb, &found_key, slot);
5465         if (found_key.objectid != key.objectid ||
5466             found_key.type != key.type) {
5467                 /* If we're a hole then just pretend nothing changed */
5468                 ret = (left_disknr) ? 0 : 1;
5469                 goto out;
5470         }
5471 
5472         /*
5473          * We're now on 2a, 2b or 7.
5474          */
5475         key = found_key;
5476         while (key.offset < ekey->offset + left_len) {
5477                 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5478                 right_type = btrfs_file_extent_type(eb, ei);
5479                 if (right_type != BTRFS_FILE_EXTENT_REG &&
5480                     right_type != BTRFS_FILE_EXTENT_INLINE) {
5481                         ret = 0;
5482                         goto out;
5483                 }
5484 
5485                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5486                         right_len = btrfs_file_extent_ram_bytes(eb, ei);
5487                         right_len = PAGE_ALIGN(right_len);
5488                 } else {
5489                         right_len = btrfs_file_extent_num_bytes(eb, ei);
5490                 }
5491 
5492                 /*
5493                  * Are we at extent 8? If yes, we know the extent is changed.
5494                  * This may only happen on the first iteration.
5495                  */
5496                 if (found_key.offset + right_len <= ekey->offset) {
5497                         /* If we're a hole just pretend nothing changed */
5498                         ret = (left_disknr) ? 0 : 1;
5499                         goto out;
5500                 }
5501 
5502                 /*
5503                  * We just wanted to see if when we have an inline extent, what
5504                  * follows it is a regular extent (wanted to check the above
5505                  * condition for inline extents too). This should normally not
5506                  * happen but it's possible for example when we have an inline
5507                  * compressed extent representing data with a size matching
5508                  * the page size (currently the same as sector size).
5509                  */
5510                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5511                         ret = 0;
5512                         goto out;
5513                 }
5514 
5515                 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5516                 right_offset = btrfs_file_extent_offset(eb, ei);
5517                 right_gen = btrfs_file_extent_generation(eb, ei);
5518 
5519                 left_offset_fixed = left_offset;
5520                 if (key.offset < ekey->offset) {
5521                         /* Fix the right offset for 2a and 7. */
5522                         right_offset += ekey->offset - key.offset;
5523                 } else {
5524                         /* Fix the left offset for all behind 2a and 2b */
5525                         left_offset_fixed += key.offset - ekey->offset;
5526                 }
5527 
5528                 /*
5529                  * Check if we have the same extent.
5530                  */
5531                 if (left_disknr != right_disknr ||
5532                     left_offset_fixed != right_offset ||
5533                     left_gen != right_gen) {
5534                         ret = 0;
5535                         goto out;
5536                 }
5537 
5538                 /*
5539                  * Go to the next extent.
5540                  */
5541                 ret = btrfs_next_item(sctx->parent_root, path);
5542                 if (ret < 0)
5543                         goto out;
5544                 if (!ret) {
5545                         eb = path->nodes[0];
5546                         slot = path->slots[0];
5547                         btrfs_item_key_to_cpu(eb, &found_key, slot);
5548                 }
5549                 if (ret || found_key.objectid != key.objectid ||
5550                     found_key.type != key.type) {
5551                         key.offset += right_len;
5552                         break;
5553                 }
5554                 if (found_key.offset != key.offset + right_len) {
5555                         ret = 0;
5556                         goto out;
5557                 }
5558                 key = found_key;
5559         }
5560 
5561         /*
5562          * We're now behind the left extent (treat as unchanged) or at the end
5563          * of the right side (treat as changed).
5564          */
5565         if (key.offset >= ekey->offset + left_len)
5566                 ret = 1;
5567         else
5568                 ret = 0;
5569 
5570 
5571 out:
5572         btrfs_free_path(path);
5573         return ret;
5574 }
5575 
5576 static int get_last_extent(struct send_ctx *sctx, u64 offset)
5577 {
5578         struct btrfs_path *path;
5579         struct btrfs_root *root = sctx->send_root;
5580         struct btrfs_file_extent_item *fi;
5581         struct btrfs_key key;
5582         u64 extent_end;
5583         u8 type;
5584         int ret;
5585 
5586         path = alloc_path_for_send();
5587         if (!path)
5588                 return -ENOMEM;
5589 
5590         sctx->cur_inode_last_extent = 0;
5591 
5592         key.objectid = sctx->cur_ino;
5593         key.type = BTRFS_EXTENT_DATA_KEY;
5594         key.offset = offset;
5595         ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
5596         if (ret < 0)
5597                 goto out;
5598         ret = 0;
5599         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5600         if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
5601                 goto out;
5602 
5603         fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5604                             struct btrfs_file_extent_item);
5605         type = btrfs_file_extent_type(path->nodes[0], fi);
5606         if (type == BTRFS_FILE_EXTENT_INLINE) {
5607                 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5608                 extent_end = ALIGN(key.offset + size,
5609                                    sctx->send_root->fs_info->sectorsize);
5610         } else {
5611                 extent_end = key.offset +
5612                         btrfs_file_extent_num_bytes(path->nodes[0], fi);
5613         }
5614         sctx->cur_inode_last_extent = extent_end;
5615 out:
5616         btrfs_free_path(path);
5617         return ret;
5618 }
5619 
5620 static int range_is_hole_in_parent(struct send_ctx *sctx,
5621                                    const u64 start,
5622                                    const u64 end)
5623 {
5624         struct btrfs_path *path;
5625         struct btrfs_key key;
5626         struct btrfs_root *root = sctx->parent_root;
5627         u64 search_start = start;
5628         int ret;
5629 
5630         path = alloc_path_for_send();
5631         if (!path)
5632                 return -ENOMEM;
5633 
5634         key.objectid = sctx->cur_ino;
5635         key.type = BTRFS_EXTENT_DATA_KEY;
5636         key.offset = search_start;
5637         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5638         if (ret < 0)
5639                 goto out;
5640         if (ret > 0 && path->slots[0] > 0)
5641                 path->slots[0]--;
5642 
5643         while (search_start < end) {
5644                 struct extent_buffer *leaf = path->nodes[0];
5645                 int slot = path->slots[0];
5646                 struct btrfs_file_extent_item *fi;
5647                 u64 extent_end;
5648 
5649                 if (slot >= btrfs_header_nritems(leaf)) {
5650                         ret = btrfs_next_leaf(root, path);
5651                         if (ret < 0)
5652                                 goto out;
5653                         else if (ret > 0)
5654                                 break;
5655                         continue;
5656                 }
5657 
5658                 btrfs_item_key_to_cpu(leaf, &key, slot);
5659                 if (key.objectid < sctx->cur_ino ||
5660                     key.type < BTRFS_EXTENT_DATA_KEY)
5661                         goto next;
5662                 if (key.objectid > sctx->cur_ino ||
5663                     key.type > BTRFS_EXTENT_DATA_KEY ||
5664                     key.offset >= end)
5665                         break;
5666 
5667                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5668                 if (btrfs_file_extent_type(leaf, fi) ==
5669                     BTRFS_FILE_EXTENT_INLINE) {
5670                         u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
5671 
5672                         extent_end = ALIGN(key.offset + size,
5673                                            root->fs_info->sectorsize);
5674                 } else {
5675                         extent_end = key.offset +
5676                                 btrfs_file_extent_num_bytes(leaf, fi);
5677                 }
5678                 if (extent_end <= start)
5679                         goto next;
5680                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
5681                         search_start = extent_end;
5682                         goto next;
5683                 }
5684                 ret = 0;
5685                 goto out;
5686 next:
5687                 path->slots[0]++;
5688         }
5689         ret = 1;
5690 out:
5691         btrfs_free_path(path);
5692         return ret;
5693 }
5694 
5695 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
5696                            struct btrfs_key *key)
5697 {
5698         struct btrfs_file_extent_item *fi;
5699         u64 extent_end;
5700         u8 type;
5701         int ret = 0;
5702 
5703         if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
5704                 return 0;
5705 
5706         if (sctx->cur_inode_last_extent == (u64)-1) {
5707                 ret = get_last_extent(sctx, key->offset - 1);
5708                 if (ret)
5709                         return ret;
5710         }
5711 
5712         fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5713                             struct btrfs_file_extent_item);
5714         type = btrfs_file_extent_type(path->nodes[0], fi);
5715         if (type == BTRFS_FILE_EXTENT_INLINE) {
5716                 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5717                 extent_end = ALIGN(key->offset + size,
5718                                    sctx->send_root->fs_info->sectorsize);
5719         } else {
5720                 extent_end = key->offset +
5721                         btrfs_file_extent_num_bytes(path->nodes[0], fi);
5722         }
5723 
5724         if (path->slots[0] == 0 &&
5725             sctx->cur_inode_last_extent < key->offset) {
5726                 /*
5727                  * We might have skipped entire leafs that contained only
5728                  * file extent items for our current inode. These leafs have
5729                  * a generation number smaller (older) than the one in the
5730                  * current leaf and the leaf our last extent came from, and
5731                  * are located between these 2 leafs.
5732                  */
5733                 ret = get_last_extent(sctx, key->offset - 1);
5734                 if (ret)
5735                         return ret;
5736         }
5737 
5738         if (sctx->cur_inode_last_extent < key->offset) {
5739                 ret = range_is_hole_in_parent(sctx,
5740                                               sctx->cur_inode_last_extent,
5741                                               key->offset);
5742                 if (ret < 0)
5743                         return ret;
5744                 else if (ret == 0)
5745                         ret = send_hole(sctx, key->offset);
5746                 else
5747                         ret = 0;
5748         }
5749         sctx->cur_inode_last_extent = extent_end;
5750         return ret;
5751 }
5752 
5753 static int process_extent(struct send_ctx *sctx,
5754                           struct btrfs_path *path,
5755                           struct btrfs_key *key)
5756 {
5757         struct clone_root *found_clone = NULL;
5758         int ret = 0;
5759 
5760         if (S_ISLNK(sctx->cur_inode_mode))
5761                 return 0;
5762 
5763         if (sctx->parent_root && !sctx->cur_inode_new) {
5764                 ret = is_extent_unchanged(sctx, path, key);
5765                 if (ret < 0)
5766                         goto out;
5767                 if (ret) {
5768                         ret = 0;
5769                         goto out_hole;
5770                 }
5771         } else {
5772                 struct btrfs_file_extent_item *ei;
5773                 u8 type;
5774 
5775                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5776                                     struct btrfs_file_extent_item);
5777                 type = btrfs_file_extent_type(path->nodes[0], ei);
5778                 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
5779                     type == BTRFS_FILE_EXTENT_REG) {
5780                         /*
5781                          * The send spec does not have a prealloc command yet,
5782                          * so just leave a hole for prealloc'ed extents until
5783                          * we have enough commands queued up to justify rev'ing
5784                          * the send spec.
5785                          */
5786                         if (type == BTRFS_FILE_EXTENT_PREALLOC) {
5787                                 ret = 0;
5788                                 goto out;
5789                         }
5790 
5791                         /* Have a hole, just skip it. */
5792                         if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
5793                                 ret = 0;
5794                                 goto out;
5795                         }
5796                 }
5797         }
5798 
5799         ret = find_extent_clone(sctx, path, key->objectid, key->offset,
5800                         sctx->cur_inode_size, &found_clone);
5801         if (ret != -ENOENT && ret < 0)
5802                 goto out;
5803 
5804         ret = send_write_or_clone(sctx, path, key, found_clone);
5805         if (ret)
5806                 goto out;
5807 out_hole:
5808         ret = maybe_send_hole(sctx, path, key);
5809 out:
5810         return ret;
5811 }
5812 
5813 static int process_all_extents(struct send_ctx *sctx)
5814 {
5815         int ret;
5816         struct btrfs_root *root;
5817         struct btrfs_path *path;
5818         struct btrfs_key key;
5819         struct btrfs_key found_key;
5820         struct extent_buffer *eb;
5821         int slot;
5822 
5823         root = sctx->send_root;
5824         path = alloc_path_for_send();
5825         if (!path)
5826                 return -ENOMEM;
5827 
5828         key.objectid = sctx->cmp_key->objectid;
5829         key.type = BTRFS_EXTENT_DATA_KEY;
5830         key.offset = 0;
5831         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5832         if (ret < 0)
5833                 goto out;
5834 
5835         while (1) {
5836                 eb = path->nodes[0];
5837                 slot = path->slots[0];
5838 
5839                 if (slot >= btrfs_header_nritems(eb)) {
5840                         ret = btrfs_next_leaf(root, path);
5841                         if (ret < 0) {
5842                                 goto out;
5843                         } else if (ret > 0) {
5844                                 ret = 0;
5845                                 break;
5846                         }
5847                         continue;
5848                 }
5849 
5850                 btrfs_item_key_to_cpu(eb, &found_key, slot);
5851 
5852                 if (found_key.objectid != key.objectid ||
5853                     found_key.type != key.type) {
5854                         ret = 0;
5855                         goto out;
5856                 }
5857 
5858                 ret = process_extent(sctx, path, &found_key);
5859                 if (ret < 0)
5860                         goto out;
5861 
5862                 path->slots[0]++;
5863         }
5864 
5865 out:
5866         btrfs_free_path(path);
5867         return ret;
5868 }
5869 
5870 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
5871                                            int *pending_move,
5872                                            int *refs_processed)
5873 {
5874         int ret = 0;
5875 
5876         if (sctx->cur_ino == 0)
5877                 goto out;
5878         if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
5879             sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
5880                 goto out;
5881         if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
5882                 goto out;
5883 
5884         ret = process_recorded_refs(sctx, pending_move);
5885         if (ret < 0)
5886                 goto out;
5887 
5888         *refs_processed = 1;
5889 out:
5890         return ret;
5891 }
5892 
5893 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5894 {
5895         int ret = 0;
5896         u64 left_mode;
5897         u64 left_uid;
5898         u64 left_gid;
5899         u64 right_mode;
5900         u64 right_uid;
5901         u64 right_gid;
5902         int need_chmod = 0;
5903         int need_chown = 0;
5904         int need_truncate = 1;
5905         int pending_move = 0;
5906         int refs_processed = 0;
5907 
5908         if (sctx->ignore_cur_inode)
5909                 return 0;
5910 
5911         ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5912                                               &refs_processed);
5913         if (ret < 0)
5914                 goto out;
5915 
5916         /*
5917          * We have processed the refs and thus need to advance send_progress.
5918          * Now, calls to get_cur_xxx will take the updated refs of the current
5919          * inode into account.
5920          *
5921          * On the other hand, if our current inode is a directory and couldn't
5922          * be moved/renamed because its parent was renamed/moved too and it has
5923          * a higher inode number, we can only move/rename our current inode
5924          * after we moved/renamed its parent. Therefore in this case operate on
5925          * the old path (pre move/rename) of our current inode, and the
5926          * move/rename will be performed later.
5927          */
5928         if (refs_processed && !pending_move)
5929                 sctx->send_progress = sctx->cur_ino + 1;
5930 
5931         if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
5932                 goto out;
5933         if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
5934                 goto out;
5935 
5936         ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
5937                         &left_mode, &left_uid, &left_gid, NULL);
5938         if (ret < 0)
5939                 goto out;
5940 
5941         if (!sctx->parent_root || sctx->cur_inode_new) {
5942                 need_chown = 1;
5943                 if (!S_ISLNK(sctx->cur_inode_mode))
5944                         need_chmod = 1;
5945                 if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
5946                         need_truncate = 0;
5947         } else {
5948                 u64 old_size;
5949 
5950                 ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
5951                                 &old_size, NULL, &right_mode, &right_uid,
5952                                 &right_gid, NULL);
5953                 if (ret < 0)
5954                         goto out;
5955 
5956                 if (left_uid != right_uid || left_gid != right_gid)
5957                         need_chown = 1;
5958                 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
5959                         need_chmod = 1;
5960                 if ((old_size == sctx->cur_inode_size) ||
5961                     (sctx->cur_inode_size > old_size &&
5962                      sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
5963                         need_truncate = 0;
5964         }
5965 
5966         if (S_ISREG(sctx->cur_inode_mode)) {
5967                 if (need_send_hole(sctx)) {
5968                         if (sctx->cur_inode_last_extent == (u64)-1 ||
5969                             sctx->cur_inode_last_extent <
5970                             sctx->cur_inode_size) {
5971                                 ret = get_last_extent(sctx, (u64)-1);
5972                                 if (ret)
5973                                         goto out;
5974                         }
5975                         if (sctx->cur_inode_last_extent <
5976                             sctx->cur_inode_size) {
5977                                 ret = send_hole(sctx, sctx->cur_inode_size);
5978                                 if (ret)
5979                                         goto out;
5980                         }
5981                 }
5982                 if (need_truncate) {
5983                         ret = send_truncate(sctx, sctx->cur_ino,
5984                                             sctx->cur_inode_gen,
5985                                             sctx->cur_inode_size);
5986                         if (ret < 0)
5987                                 goto out;
5988                 }
5989         }
5990 
5991         if (need_chown) {
5992                 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5993                                 left_uid, left_gid);
5994                 if (ret < 0)
5995                         goto out;
5996         }
5997         if (need_chmod) {
5998                 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5999                                 left_mode);
6000                 if (ret < 0)
6001                         goto out;
6002         }
6003 
6004         /*
6005          * If other directory inodes depended on our current directory
6006          * inode's move/rename, now do their move/rename operations.
6007          */
6008         if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
6009                 ret = apply_children_dir_moves(sctx);
6010                 if (ret)
6011                         goto out;
6012                 /*
6013                  * Need to send that every time, no matter if it actually
6014                  * changed between the two trees as we have done changes to
6015                  * the inode before. If our inode is a directory and it's
6016                  * waiting to be moved/renamed, we will send its utimes when
6017                  * it's moved/renamed, therefore we don't need to do it here.
6018                  */
6019                 sctx->send_progress = sctx->cur_ino + 1;
6020                 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
6021                 if (ret < 0)
6022                         goto out;
6023         }
6024 
6025 out:
6026         return ret;
6027 }
6028 
6029 struct parent_paths_ctx {
6030         struct list_head *refs;
6031         struct send_ctx *sctx;
6032 };
6033 
6034 static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
6035                              void *ctx)
6036 {
6037         struct parent_paths_ctx *ppctx = ctx;
6038 
6039         return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
6040                           ppctx->refs);
6041 }
6042 
6043 /*
6044  * Issue unlink operations for all paths of the current inode found in the
6045  * parent snapshot.
6046  */
6047 static int btrfs_unlink_all_paths(struct send_ctx *sctx)
6048 {
6049         LIST_HEAD(deleted_refs);
6050         struct btrfs_path *path;
6051         struct btrfs_key key;
6052         struct parent_paths_ctx ctx;
6053         int ret;
6054 
6055         path = alloc_path_for_send();
6056         if (!path)
6057                 return -ENOMEM;
6058 
6059         key.objectid = sctx->cur_ino;
6060         key.type = BTRFS_INODE_REF_KEY;
6061         key.offset = 0;
6062         ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
6063         if (ret < 0)
6064                 goto out;
6065 
6066         ctx.refs = &deleted_refs;
6067         ctx.sctx = sctx;
6068 
6069         while (true) {
6070                 struct extent_buffer *eb = path->nodes[0];
6071                 int slot = path->slots[0];
6072 
6073                 if (slot >= btrfs_header_nritems(eb)) {
6074                         ret = btrfs_next_leaf(sctx->parent_root, path);
6075                         if (ret < 0)
6076                                 goto out;
6077                         else if (ret > 0)
6078                                 break;
6079                         continue;
6080                 }
6081 
6082                 btrfs_item_key_to_cpu(eb, &key, slot);
6083                 if (key.objectid != sctx->cur_ino)
6084                         break;
6085                 if (key.type != BTRFS_INODE_REF_KEY &&
6086                     key.type != BTRFS_INODE_EXTREF_KEY)
6087                         break;
6088 
6089                 ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
6090                                         record_parent_ref, &ctx);
6091                 if (ret < 0)
6092                         goto out;
6093 
6094                 path->slots[0]++;
6095         }
6096 
6097         while (!list_empty(&deleted_refs)) {
6098                 struct recorded_ref *ref;
6099 
6100                 ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
6101                 ret = send_unlink(sctx, ref->full_path);
6102                 if (ret < 0)
6103                         goto out;
6104                 fs_path_free(ref->full_path);
6105                 list_del(&ref->list);
6106                 kfree(ref);
6107         }
6108         ret = 0;
6109 out:
6110         btrfs_free_path(path);
6111         if (ret)
6112                 __free_recorded_refs(&deleted_refs);
6113         return ret;
6114 }
6115 
6116 static int changed_inode(struct send_ctx *sctx,
6117                          enum btrfs_compare_tree_result result)
6118 {
6119         int ret = 0;
6120         struct btrfs_key *key = sctx->cmp_key;
6121         struct btrfs_inode_item *left_ii = NULL;
6122         struct btrfs_inode_item *right_ii = NULL;
6123         u64 left_gen = 0;
6124         u64 right_gen = 0;
6125 
6126         sctx->cur_ino = key->objectid;
6127         sctx->cur_inode_new_gen = 0;
6128         sctx->cur_inode_last_extent = (u64)-1;
6129         sctx->cur_inode_next_write_offset = 0;
6130         sctx->ignore_cur_inode = false;
6131 
6132         /*
6133          * Set send_progress to current inode. This will tell all get_cur_xxx
6134          * functions that the current inode's refs are not updated yet. Later,
6135          * when process_recorded_refs is finished, it is set to cur_ino + 1.
6136          */
6137         sctx->send_progress = sctx->cur_ino;
6138 
6139         if (result == BTRFS_COMPARE_TREE_NEW ||
6140             result == BTRFS_COMPARE_TREE_CHANGED) {
6141                 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
6142                                 sctx->left_path->slots[0],
6143                                 struct btrfs_inode_item);
6144                 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
6145                                 left_ii);
6146         } else {
6147                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
6148                                 sctx->right_path->slots[0],
6149                                 struct btrfs_inode_item);
6150                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
6151                                 right_ii);
6152         }
6153         if (result == BTRFS_COMPARE_TREE_CHANGED) {
6154                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
6155                                 sctx->right_path->slots[0],
6156                                 struct btrfs_inode_item);
6157 
6158                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
6159                                 right_ii);
6160 
6161                 /*
6162                  * The cur_ino = root dir case is special here. We can't treat
6163                  * the inode as deleted+reused because it would generate a
6164                  * stream that tries to delete/mkdir the root dir.
6165                  */
6166                 if (left_gen != right_gen &&
6167                     sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
6168                         sctx->cur_inode_new_gen = 1;
6169         }
6170 
6171         /*
6172          * Normally we do not find inodes with a link count of zero (orphans)
6173          * because the most common case is to create a snapshot and use it
6174          * for a send operation. However other less common use cases involve
6175          * using a subvolume and send it after turning it to RO mode just
6176          * after deleting all hard links of a file while holding an open
6177          * file descriptor against it or turning a RO snapshot into RW mode,
6178          * keep an open file descriptor against a file, delete it and then
6179          * turn the snapshot back to RO mode before using it for a send
6180          * operation. So if we find such cases, ignore the inode and all its
6181          * items completely if it's a new inode, or if it's a changed inode
6182          * make sure all its previous paths (from the parent snapshot) are all
6183          * unlinked and all other the inode items are ignored.
6184          */
6185         if (result == BTRFS_COMPARE_TREE_NEW ||
6186             result == BTRFS_COMPARE_TREE_CHANGED) {
6187                 u32 nlinks;
6188 
6189                 nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
6190                 if (nlinks == 0) {
6191                         sctx->ignore_cur_inode = true;
6192                         if (result == BTRFS_COMPARE_TREE_CHANGED)
6193                                 ret = btrfs_unlink_all_paths(sctx);
6194                         goto out;
6195                 }
6196         }
6197 
6198         if (result == BTRFS_COMPARE_TREE_NEW) {
6199                 sctx->cur_inode_gen = left_gen;
6200                 sctx->cur_inode_new = 1;
6201                 sctx->cur_inode_deleted = 0;
6202                 sctx->cur_inode_size = btrfs_inode_size(
6203                                 sctx->left_path->nodes[0], left_ii);
6204                 sctx->cur_inode_mode = btrfs_inode_mode(
6205                                 sctx->left_path->nodes[0], left_ii);
6206                 sctx->cur_inode_rdev = btrfs_inode_rdev(
6207                                 sctx->left_path->nodes[0], left_ii);
6208                 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
6209                         ret = send_create_inode_if_needed(sctx);
6210         } else if (result == BTRFS_COMPARE_TREE_DELETED) {
6211                 sctx->cur_inode_gen = right_gen;
6212                 sctx->cur_inode_new = 0;
6213                 sctx->cur_inode_deleted = 1;
6214                 sctx->cur_inode_size = btrfs_inode_size(
6215                                 sctx->right_path->nodes[0], right_ii);
6216                 sctx->cur_inode_mode = btrfs_inode_mode(
6217                                 sctx->right_path->nodes[0], right_ii);
6218         } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
6219                 /*
6220                  * We need to do some special handling in case the inode was
6221                  * reported as changed with a changed generation number. This
6222                  * means that the original inode was deleted and new inode
6223                  * reused the same inum. So we have to treat the old inode as
6224                  * deleted and the new one as new.
6225                  */
6226                 if (sctx->cur_inode_new_gen) {
6227                         /*
6228                          * First, process the inode as if it was deleted.
6229                          */
6230                         sctx->cur_inode_gen = right_gen;
6231                         sctx->cur_inode_new = 0;
6232                         sctx->cur_inode_deleted = 1;
6233                         sctx->cur_inode_size = btrfs_inode_size(
6234                                         sctx->right_path->nodes[0], right_ii);
6235                         sctx->cur_inode_mode = btrfs_inode_mode(
6236                                         sctx->right_path->nodes[0], right_ii);
6237                         ret = process_all_refs(sctx,
6238                                         BTRFS_COMPARE_TREE_DELETED);
6239                         if (ret < 0)
6240                                 goto out;
6241 
6242                         /*
6243                          * Now process the inode as if it was new.
6244                          */
6245                         sctx->cur_inode_gen = left_gen;
6246                         sctx->cur_inode_new = 1;
6247                         sctx->cur_inode_deleted = 0;
6248                         sctx->cur_inode_size = btrfs_inode_size(
6249                                         sctx->left_path->nodes[0], left_ii);
6250                         sctx->cur_inode_mode = btrfs_inode_mode(
6251                                         sctx->left_path->nodes[0], left_ii);
6252                         sctx->cur_inode_rdev = btrfs_inode_rdev(
6253                                         sctx->left_path->nodes[0], left_ii);
6254                         ret = send_create_inode_if_needed(sctx);
6255                         if (ret < 0)
6256                                 goto out;
6257 
6258                         ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
6259                         if (ret < 0)
6260                                 goto out;
6261                         /*
6262                          * Advance send_progress now as we did not get into
6263                          * process_recorded_refs_if_needed in the new_gen case.
6264                          */
6265                         sctx->send_progress = sctx->cur_ino + 1;
6266 
6267                         /*
6268                          * Now process all extents and xattrs of the inode as if
6269                          * they were all new.
6270                          */
6271                         ret = process_all_extents(sctx);
6272                         if (ret < 0)
6273                                 goto out;
6274                         ret = process_all_new_xattrs(sctx);
6275                         if (ret < 0)
6276                                 goto out;
6277                 } else {
6278                         sctx->cur_inode_gen = left_gen;
6279                         sctx->cur_inode_new = 0;
6280                         sctx->cur_inode_new_gen = 0;
6281                         sctx->cur_inode_deleted = 0;
6282                         sctx->cur_inode_size = btrfs_inode_size(
6283                                         sctx->left_path->nodes[0], left_ii);
6284                         sctx->cur_inode_mode = btrfs_inode_mode(
6285                                         sctx->left_path->nodes[0], left_ii);
6286                 }
6287         }
6288 
6289 out:
6290         return ret;
6291 }
6292 
6293 /*
6294  * We have to process new refs before deleted refs, but compare_trees gives us
6295  * the new and deleted refs mixed. To fix this, we record the new/deleted refs
6296  * first and later process them in process_recorded_refs.
6297  * For the cur_inode_new_gen case, we skip recording completely because
6298  * changed_inode did already initiate processing of refs. The reason for this is
6299  * that in this case, compare_tree actually compares the refs of 2 different
6300  * inodes. To fix this, process_all_refs is used in changed_inode to handle all
6301  * refs of the right tree as deleted and all refs of the left tree as new.
6302  */
6303 static int changed_ref(struct send_ctx *sctx,
6304                        enum btrfs_compare_tree_result result)
6305 {
6306         int ret = 0;
6307 
6308         if (sctx->cur_ino != sctx->cmp_key->objectid) {
6309                 inconsistent_snapshot_error(sctx, result, "reference");
6310                 return -EIO;
6311         }
6312 
6313         if (!sctx->cur_inode_new_gen &&
6314             sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
6315                 if (result == BTRFS_COMPARE_TREE_NEW)
6316                         ret = record_new_ref(sctx);
6317                 else if (result == BTRFS_COMPARE_TREE_DELETED)
6318                         ret = record_deleted_ref(sctx);
6319                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
6320                         ret = record_changed_ref(sctx);
6321         }
6322 
6323         return ret;
6324 }
6325 
6326 /*
6327  * Process new/deleted/changed xattrs. We skip processing in the
6328  * cur_inode_new_gen case because changed_inode did already initiate processing
6329  * of xattrs. The reason is the same as in changed_ref
6330  */
6331 static int changed_xattr(struct send_ctx *sctx,
6332                          enum btrfs_compare_tree_result result)
6333 {
6334         int ret = 0;
6335 
6336         if (sctx->cur_ino != sctx->cmp_key->objectid) {
6337                 inconsistent_snapshot_error(sctx, result, "xattr");
6338                 return -EIO;
6339         }
6340 
6341         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6342                 if (result == BTRFS_COMPARE_TREE_NEW)
6343                         ret = process_new_xattr(sctx);
6344                 else if (result == BTRFS_COMPARE_TREE_DELETED)
6345                         ret = process_deleted_xattr(sctx);
6346                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
6347                         ret = process_changed_xattr(sctx);
6348         }
6349 
6350         return ret;
6351 }
6352 
6353 /*
6354  * Process new/deleted/changed extents. We skip processing in the
6355  * cur_inode_new_gen case because changed_inode did already initiate processing
6356  * of extents. The reason is the same as in changed_ref
6357  */
6358 static int changed_extent(struct send_ctx *sctx,
6359                           enum btrfs_compare_tree_result result)
6360 {
6361         int ret = 0;
6362 
6363         /*
6364          * We have found an extent item that changed without the inode item
6365          * having changed. This can happen either after relocation (where the
6366          * disk_bytenr of an extent item is replaced at
6367          * relocation.c:replace_file_extents()) or after deduplication into a
6368          * file in both the parent and send snapshots (where an extent item can
6369          * get modified or replaced with a new one). Note that deduplication
6370          * updates the inode item, but it only changes the iversion (sequence
6371          * field in the inode item) of the inode, so if a file is deduplicated
6372          * the same amount of times in both the parent and send snapshots, its
6373          * iversion becames the same in both snapshots, whence the inode item is
6374          * the same on both snapshots.
6375          */
6376         if (sctx->cur_ino != sctx->cmp_key->objectid)
6377                 return 0;
6378 
6379         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6380                 if (result != BTRFS_COMPARE_TREE_DELETED)
6381                         ret = process_extent(sctx, sctx->left_path,
6382                                         sctx->cmp_key);
6383         }
6384 
6385         return ret;
6386 }
6387 
6388 static int dir_changed(struct send_ctx *sctx, u64 dir)
6389 {
6390         u64 orig_gen, new_gen;
6391         int ret;
6392 
6393         ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
6394                              NULL, NULL);
6395         if (ret)
6396                 return ret;
6397 
6398         ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
6399                              NULL, NULL, NULL);
6400         if (ret)
6401                 return ret;
6402 
6403         return (orig_gen != new_gen) ? 1 : 0;
6404 }
6405 
6406 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
6407                         struct btrfs_key *key)
6408 {
6409         struct btrfs_inode_extref *extref;
6410         struct extent_buffer *leaf;
6411         u64 dirid = 0, last_dirid = 0;
6412         unsigned long ptr;
6413         u32 item_size;
6414         u32 cur_offset = 0;
6415         int ref_name_len;
6416         int ret = 0;
6417 
6418         /* Easy case, just check this one dirid */
6419         if (key->type == BTRFS_INODE_REF_KEY) {
6420                 dirid = key->offset;
6421 
6422                 ret = dir_changed(sctx, dirid);
6423                 goto out;
6424         }
6425 
6426         leaf = path->nodes[0];
6427         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
6428         ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
6429         while (cur_offset < item_size) {
6430                 extref = (struct btrfs_inode_extref *)(ptr +
6431                                                        cur_offset);
6432                 dirid = btrfs_inode_extref_parent(leaf, extref);
6433                 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
6434                 cur_offset += ref_name_len + sizeof(*extref);
6435                 if (dirid == last_dirid)
6436                         continue;
6437                 ret = dir_changed(sctx, dirid);
6438                 if (ret)
6439                         break;
6440                 last_dirid = dirid;
6441         }
6442 out:
6443         return ret;
6444 }
6445 
6446 /*
6447  * Updates compare related fields in sctx and simply forwards to the actual
6448  * changed_xxx functions.
6449  */
6450 static int changed_cb(struct btrfs_path *left_path,
6451                       struct btrfs_path *right_path,
6452                       struct btrfs_key *key,
6453                       enum btrfs_compare_tree_result result,
6454                       void *ctx)
6455 {
6456         int ret = 0;
6457         struct send_ctx *sctx = ctx;
6458 
6459         if (result == BTRFS_COMPARE_TREE_SAME) {
6460                 if (key->type == BTRFS_INODE_REF_KEY ||
6461                     key->type == BTRFS_INODE_EXTREF_KEY) {
6462                         ret = compare_refs(sctx, left_path, key);
6463                         if (!ret)
6464                                 return 0;
6465                         if (ret < 0)
6466                                 return ret;
6467                 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
6468                         return maybe_send_hole(sctx, left_path, key);
6469                 } else {
6470                         return 0;
6471                 }
6472                 result = BTRFS_COMPARE_TREE_CHANGED;
6473                 ret = 0;
6474         }
6475 
6476         sctx->left_path = left_path;
6477         sctx->right_path = right_path;
6478         sctx->cmp_key = key;
6479 
6480         ret = finish_inode_if_needed(sctx, 0);
6481         if (ret < 0)
6482                 goto out;
6483 
6484         /* Ignore non-FS objects */
6485         if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
6486             key->objectid == BTRFS_FREE_SPACE_OBJECTID)
6487                 goto out;
6488 
6489         if (key->type == BTRFS_INODE_ITEM_KEY) {
6490                 ret = changed_inode(sctx, result);
6491         } else if (!sctx->ignore_cur_inode) {
6492                 if (key->type == BTRFS_INODE_REF_KEY ||
6493                     key->type == BTRFS_INODE_EXTREF_KEY)
6494                         ret = changed_ref(sctx, result);
6495                 else if (key->type == BTRFS_XATTR_ITEM_KEY)
6496                         ret = changed_xattr(sctx, result);
6497                 else if (key->type == BTRFS_EXTENT_DATA_KEY)
6498                         ret = changed_extent(sctx, result);
6499         }
6500 
6501 out:
6502         return ret;
6503 }
6504 
6505 static int full_send_tree(struct send_ctx *sctx)
6506 {
6507         int ret;
6508         struct btrfs_root *send_root = sctx->send_root;
6509         struct btrfs_key key;
6510         struct btrfs_path *path;
6511         struct extent_buffer *eb;
6512         int slot;
6513 
6514         path = alloc_path_for_send();
6515         if (!path)
6516                 return -ENOMEM;
6517 
6518         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6519         key.type = BTRFS_INODE_ITEM_KEY;
6520         key.offset = 0;
6521 
6522         ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
6523         if (ret < 0)
6524                 goto out;
6525         if (ret)
6526                 goto out_finish;
6527 
6528         while (1) {
6529                 eb = path->nodes[0];
6530                 slot = path->slots[0];
6531                 btrfs_item_key_to_cpu(eb, &key, slot);
6532 
6533                 ret = changed_cb(path, NULL, &key,
6534                                  BTRFS_COMPARE_TREE_NEW, sctx);
6535                 if (ret < 0)
6536                         goto out;
6537 
6538                 ret = btrfs_next_item(send_root, path);
6539                 if (ret < 0)
6540                         goto out;
6541                 if (ret) {
6542                         ret  = 0;
6543                         break;
6544                 }
6545         }
6546 
6547 out_finish:
6548         ret = finish_inode_if_needed(sctx, 1);
6549 
6550 out:
6551         btrfs_free_path(path);
6552         return ret;
6553 }
6554 
6555 static int tree_move_down(struct btrfs_path *path, int *level)
6556 {
6557         struct extent_buffer *eb;
6558 
6559         BUG_ON(*level == 0);
6560         eb = btrfs_read_node_slot(path->nodes[*level], path->slots[*level]);
6561         if (IS_ERR(eb))
6562                 return PTR_ERR(eb);
6563 
6564         path->nodes[*level - 1] = eb;
6565         path->slots[*level - 1] = 0;
6566         (*level)--;
6567         return 0;
6568 }
6569 
6570 static int tree_move_next_or_upnext(struct btrfs_path *path,
6571                                     int *level, int root_level)
6572 {
6573         int ret = 0;
6574         int nritems;
6575         nritems = btrfs_header_nritems(path->nodes[*level]);
6576 
6577         path->slots[*level]++;
6578 
6579         while (path->slots[*level] >= nritems) {
6580                 if (*level == root_level)
6581                         return -1;
6582 
6583                 /* move upnext */
6584                 path->slots[*level] = 0;
6585                 free_extent_buffer(path->nodes[*level]);
6586                 path->nodes[*level] = NULL;
6587                 (*level)++;
6588                 path->slots[*level]++;
6589 
6590                 nritems = btrfs_header_nritems(path->nodes[*level]);
6591                 ret = 1;
6592         }
6593         return ret;
6594 }
6595 
6596 /*
6597  * Returns 1 if it had to move up and next. 0 is returned if it moved only next
6598  * or down.
6599  */
6600 static int tree_advance(struct btrfs_path *path,
6601                         int *level, int root_level,
6602                         int allow_down,
6603                         struct btrfs_key *key)
6604 {
6605         int ret;
6606 
6607         if (*level == 0 || !allow_down) {
6608                 ret = tree_move_next_or_upnext(path, level, root_level);
6609         } else {
6610                 ret = tree_move_down(path, level);
6611         }
6612         if (ret >= 0) {
6613                 if (*level == 0)
6614                         btrfs_item_key_to_cpu(path->nodes[*level], key,
6615                                         path->slots[*level]);
6616                 else
6617                         btrfs_node_key_to_cpu(path->nodes[*level], key,
6618                                         path->slots[*level]);
6619         }
6620         return ret;
6621 }
6622 
6623 static int tree_compare_item(struct btrfs_path *left_path,
6624                              struct btrfs_path *right_path,
6625                              char *tmp_buf)
6626 {
6627         int cmp;
6628         int len1, len2;
6629         unsigned long off1, off2;
6630 
6631         len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]);
6632         len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]);
6633         if (len1 != len2)
6634                 return 1;
6635 
6636         off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
6637         off2 = btrfs_item_ptr_offset(right_path->nodes[0],
6638                                 right_path->slots[0]);
6639 
6640         read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
6641 
6642         cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
6643         if (cmp)
6644                 return 1;
6645         return 0;
6646 }
6647 
6648 /*
6649  * This function compares two trees and calls the provided callback for
6650  * every changed/new/deleted item it finds.
6651  * If shared tree blocks are encountered, whole subtrees are skipped, making
6652  * the compare pretty fast on snapshotted subvolumes.
6653  *
6654  * This currently works on commit roots only. As commit roots are read only,
6655  * we don't do any locking. The commit roots are protected with transactions.
6656  * Transactions are ended and rejoined when a commit is tried in between.
6657  *
6658  * This function checks for modifications done to the trees while comparing.
6659  * If it detects a change, it aborts immediately.
6660  */
6661 static int btrfs_compare_trees(struct btrfs_root *left_root,
6662                         struct btrfs_root *right_root,
6663                         btrfs_changed_cb_t changed_cb, void *ctx)
6664 {
6665         struct btrfs_fs_info *fs_info = left_root->fs_info;
6666         int ret;
6667         int cmp;
6668         struct btrfs_path *left_path = NULL;
6669         struct btrfs_path *right_path = NULL;
6670         struct btrfs_key left_key;
6671         struct btrfs_key right_key;
6672         char *tmp_buf = NULL;
6673         int left_root_level;
6674         int right_root_level;
6675         int left_level;
6676         int right_level;
6677         int left_end_reached;
6678         int right_end_reached;
6679         int advance_left;
6680         int advance_right;
6681         u64 left_blockptr;
6682         u64 right_blockptr;
6683         u64 left_gen;
6684         u64 right_gen;
6685 
6686         left_path = btrfs_alloc_path();
6687         if (!left_path) {
6688                 ret = -ENOMEM;
6689                 goto out;
6690         }
6691         right_path = btrfs_alloc_path();
6692         if (!right_path) {
6693                 ret = -ENOMEM;
6694                 goto out;
6695         }
6696 
6697         tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
6698         if (!tmp_buf) {
6699                 ret = -ENOMEM;
6700                 goto out;
6701         }
6702 
6703         left_path->search_commit_root = 1;
6704         left_path->skip_locking = 1;
6705         right_path->search_commit_root = 1;
6706         right_path->skip_locking = 1;
6707 
6708         /*
6709          * Strategy: Go to the first items of both trees. Then do
6710          *
6711          * If both trees are at level 0
6712          *   Compare keys of current items
6713          *     If left < right treat left item as new, advance left tree
6714          *       and repeat
6715          *     If left > right treat right item as deleted, advance right tree
6716          *       and repeat
6717          *     If left == right do deep compare of items, treat as changed if
6718          *       needed, advance both trees and repeat
6719          * If both trees are at the same level but not at level 0
6720          *   Compare keys of current nodes/leafs
6721          *     If left < right advance left tree and repeat
6722          *     If left > right advance right tree and repeat
6723          *     If left == right compare blockptrs of the next nodes/leafs
6724          *       If they match advance both trees but stay at the same level
6725          *         and repeat
6726          *       If they don't match advance both trees while allowing to go
6727          *         deeper and repeat
6728          * If tree levels are different
6729          *   Advance the tree that needs it and repeat
6730          *
6731          * Advancing a tree means:
6732          *   If we are at level 0, try to go to the next slot. If that's not
6733          *   possible, go one level up and repeat. Stop when we found a level
6734          *   where we could go to the next slot. We may at this point be on a
6735          *   node or a leaf.
6736          *
6737          *   If we are not at level 0 and not on shared tree blocks, go one
6738          *   level deeper.
6739          *
6740          *   If we are not at level 0 and on shared tree blocks, go one slot to
6741          *   the right if possible or go up and right.
6742          */
6743 
6744         down_read(&fs_info->commit_root_sem);
6745         left_level = btrfs_header_level(left_root->commit_root);
6746         left_root_level = left_level;
6747         left_path->nodes[left_level] =
6748                         btrfs_clone_extent_buffer(left_root->commit_root);
6749         if (!left_path->nodes[left_level]) {
6750                 up_read(&fs_info->commit_root_sem);
6751                 ret = -ENOMEM;
6752                 goto out;
6753         }
6754 
6755         right_level = btrfs_header_level(right_root->commit_root);
6756         right_root_level = right_level;
6757         right_path->nodes[right_level] =
6758                         btrfs_clone_extent_buffer(right_root->commit_root);
6759         if (!right_path->nodes[right_level]) {
6760                 up_read(&fs_info->commit_root_sem);
6761                 ret = -ENOMEM;
6762                 goto out;
6763         }
6764         up_read(&fs_info->commit_root_sem);
6765 
6766         if (left_level == 0)
6767                 btrfs_item_key_to_cpu(left_path->nodes[left_level],
6768                                 &left_key, left_path->slots[left_level]);
6769         else
6770                 btrfs_node_key_to_cpu(left_path->nodes[left_level],
6771                                 &left_key, left_path->slots[left_level]);
6772         if (right_level == 0)
6773                 btrfs_item_key_to_cpu(right_path->nodes[right_level],
6774                                 &right_key, right_path->slots[right_level]);
6775         else
6776                 btrfs_node_key_to_cpu(right_path->nodes[right_level],
6777                                 &right_key, right_path->slots[right_level]);
6778 
6779         left_end_reached = right_end_reached = 0;
6780         advance_left = advance_right = 0;
6781 
6782         while (1) {
6783                 cond_resched();
6784                 if (advance_left && !left_end_reached) {
6785                         ret = tree_advance(left_path, &left_level,
6786                                         left_root_level,
6787                                         advance_left != ADVANCE_ONLY_NEXT,
6788                                         &left_key);
6789                         if (ret == -1)
6790                                 left_end_reached = ADVANCE;
6791                         else if (ret < 0)
6792                                 goto out;
6793                         advance_left = 0;
6794                 }
6795                 if (advance_right && !right_end_reached) {
6796                         ret = tree_advance(right_path, &right_level,
6797                                         right_root_level,
6798                                         advance_right != ADVANCE_ONLY_NEXT,
6799                                         &right_key);
6800                         if (ret == -1)
6801                                 right_end_reached = ADVANCE;
6802                         else if (ret < 0)
6803                                 goto out;
6804                         advance_right = 0;
6805                 }
6806 
6807                 if (left_end_reached && right_end_reached) {
6808                         ret = 0;
6809                         goto out;
6810                 } else if (left_end_reached) {
6811                         if (right_level == 0) {
6812                                 ret = changed_cb(left_path, right_path,
6813                                                 &right_key,
6814                                                 BTRFS_COMPARE_TREE_DELETED,
6815                                                 ctx);
6816                                 if (ret < 0)
6817                                         goto out;
6818                         }
6819                         advance_right = ADVANCE;
6820                         continue;
6821                 } else if (right_end_reached) {
6822                         if (left_level == 0) {
6823                                 ret = changed_cb(left_path, right_path,
6824                                                 &left_key,
6825                                                 BTRFS_COMPARE_TREE_NEW,
6826                                                 ctx);
6827                                 if (ret < 0)
6828                                         goto out;
6829                         }
6830                         advance_left = ADVANCE;
6831                         continue;
6832                 }
6833 
6834                 if (left_level == 0 && right_level == 0) {
6835                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
6836                         if (cmp < 0) {
6837                                 ret = changed_cb(left_path, right_path,
6838                                                 &left_key,
6839                                                 BTRFS_COMPARE_TREE_NEW,
6840                                                 ctx);
6841                                 if (ret < 0)
6842                                         goto out;
6843                                 advance_left = ADVANCE;
6844                         } else if (cmp > 0) {
6845                                 ret = changed_cb(left_path, right_path,
6846                                                 &right_key,
6847                                                 BTRFS_COMPARE_TREE_DELETED,
6848                                                 ctx);
6849                                 if (ret < 0)
6850                                         goto out;
6851                                 advance_right = ADVANCE;
6852                         } else {
6853                                 enum btrfs_compare_tree_result result;
6854 
6855                                 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
6856                                 ret = tree_compare_item(left_path, right_path,
6857                                                         tmp_buf);
6858                                 if (ret)
6859                                         result = BTRFS_COMPARE_TREE_CHANGED;
6860                                 else
6861                                         result = BTRFS_COMPARE_TREE_SAME;
6862                                 ret = changed_cb(left_path, right_path,
6863                                                  &left_key, result, ctx);
6864                                 if (ret < 0)
6865                                         goto out;
6866                                 advance_left = ADVANCE;
6867                                 advance_right = ADVANCE;
6868                         }
6869                 } else if (left_level == right_level) {
6870                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
6871                         if (cmp < 0) {
6872                                 advance_left = ADVANCE;
6873                         } else if (cmp > 0) {
6874                                 advance_right = ADVANCE;
6875                         } else {
6876                                 left_blockptr = btrfs_node_blockptr(
6877                                                 left_path->nodes[left_level],
6878                                                 left_path->slots[left_level]);
6879                                 right_blockptr = btrfs_node_blockptr(
6880                                                 right_path->nodes[right_level],
6881                                                 right_path->slots[right_level]);
6882                                 left_gen = btrfs_node_ptr_generation(
6883                                                 left_path->nodes[left_level],
6884                                                 left_path->slots[left_level]);
6885                                 right_gen = btrfs_node_ptr_generation(
6886                                                 right_path->nodes[right_level],
6887                                                 right_path->slots[right_level]);
6888                                 if (left_blockptr == right_blockptr &&
6889                                     left_gen == right_gen) {
6890                                         /*
6891                                          * As we're on a shared block, don't
6892                                          * allow to go deeper.
6893                                          */
6894                                         advance_left = ADVANCE_ONLY_NEXT;
6895                                         advance_right = ADVANCE_ONLY_NEXT;
6896                                 } else {
6897                                         advance_left = ADVANCE;
6898                                         advance_right = ADVANCE;
6899                                 }
6900                         }
6901                 } else if (left_level < right_level) {
6902                         advance_right = ADVANCE;
6903                 } else {
6904                         advance_left = ADVANCE;
6905                 }
6906         }
6907 
6908 out:
6909         btrfs_free_path(left_path);
6910         btrfs_free_path(right_path);
6911         kvfree(tmp_buf);
6912         return ret;
6913 }
6914 
6915 static int send_subvol(struct send_ctx *sctx)
6916 {
6917         int ret;
6918 
6919         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
6920                 ret = send_header(sctx);
6921                 if (ret < 0)
6922                         goto out;
6923         }
6924 
6925         ret = send_subvol_begin(sctx);
6926         if (ret < 0)
6927                 goto out;
6928 
6929         if (sctx->parent_root) {
6930                 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
6931                                 changed_cb, sctx);
6932                 if (ret < 0)
6933                         goto out;
6934                 ret = finish_inode_if_needed(sctx, 1);
6935                 if (ret < 0)
6936                         goto out;
6937         } else {
6938                 ret = full_send_tree(sctx);
6939                 if (ret < 0)
6940                         goto out;
6941         }
6942 
6943 out:
6944         free_recorded_refs(sctx);
6945         return ret;
6946 }
6947 
6948 /*
6949  * If orphan cleanup did remove any orphans from a root, it means the tree
6950  * was modified and therefore the commit root is not the same as the current
6951  * root anymore. This is a problem, because send uses the commit root and
6952  * therefore can see inode items that don't exist in the current root anymore,
6953  * and for example make calls to btrfs_iget, which will do tree lookups based
6954  * on the current root and not on the commit root. Those lookups will fail,
6955  * returning a -ESTALE error, and making send fail with that error. So make
6956  * sure a send does not see any orphans we have just removed, and that it will
6957  * see the same inodes regardless of whether a transaction commit happened
6958  * before it started (meaning that the commit root will be the same as the
6959  * current root) or not.
6960  */
6961 static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
6962 {
6963         int i;
6964         struct btrfs_trans_handle *trans = NULL;
6965 
6966 again:
6967         if (sctx->parent_root &&
6968             sctx->parent_root->node != sctx->parent_root->commit_root)
6969                 goto commit_trans;
6970 
6971         for (i = 0; i < sctx->clone_roots_cnt; i++)
6972                 if (sctx->clone_roots[i].root->node !=
6973                     sctx->clone_roots[i].root->commit_root)
6974                         goto commit_trans;
6975 
6976         if (trans)
6977                 return btrfs_end_transaction(trans);
6978 
6979         return 0;
6980 
6981 commit_trans:
6982         /* Use any root, all fs roots will get their commit roots updated. */
6983         if (!trans) {
6984                 trans = btrfs_join_transaction(sctx->send_root);
6985                 if (IS_ERR(trans))
6986                         return PTR_ERR(trans);
6987                 goto again;
6988         }
6989 
6990         return btrfs_commit_transaction(trans);
6991 }
6992 
6993 /*
6994  * Make sure any existing dellaloc is flushed for any root used by a send
6995  * operation so that we do not miss any data and we do not race with writeback
6996  * finishing and changing a tree while send is using the tree. This could
6997  * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
6998  * a send operation then uses the subvolume.
6999  * After flushing delalloc ensure_commit_roots_uptodate() must be called.
7000  */
7001 static int flush_delalloc_roots(struct send_ctx *sctx)
7002 {
7003         struct btrfs_root *root = sctx->parent_root;
7004         int ret;
7005         int i;
7006 
7007         if (root) {
7008                 ret = btrfs_start_delalloc_snapshot(root);
7009                 if (ret)
7010                         return ret;
7011                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
7012         }
7013 
7014         for (i = 0; i < sctx->clone_roots_cnt; i++) {
7015                 root = sctx->clone_roots[i].root;
7016                 ret = btrfs_start_delalloc_snapshot(root);
7017                 if (ret)
7018                         return ret;
7019                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
7020         }
7021 
7022         return 0;
7023 }
7024 
7025 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
7026 {
7027         spin_lock(&root->root_item_lock);
7028         root->send_in_progress--;
7029         /*
7030          * Not much left to do, we don't know why it's unbalanced and
7031          * can't blindly reset it to 0.
7032          */
7033         if (root->send_in_progress < 0)
7034                 btrfs_err(root->fs_info,
7035                           "send_in_progress unbalanced %d root %llu",
7036                           root->send_in_progress, root->root_key.objectid);
7037         spin_unlock(&root->root_item_lock);
7038 }
7039 
7040 static void dedupe_in_progress_warn(const struct btrfs_root *root)
7041 {
7042         btrfs_warn_rl(root->fs_info,
7043 "cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
7044                       root->root_key.objectid, root->dedupe_in_progress);
7045 }
7046 
7047 long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
7048 {
7049         int ret = 0;
7050         struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
7051         struct btrfs_fs_info *fs_info = send_root->fs_info;
7052         struct btrfs_root *clone_root;
7053         struct btrfs_key key;
7054         struct send_ctx *sctx = NULL;
7055         u32 i;
7056         u64 *clone_sources_tmp = NULL;
7057         int clone_sources_to_rollback = 0;
7058         unsigned alloc_size;
7059         int sort_clone_roots = 0;
7060         int index;
7061 
7062         if (!capable(CAP_SYS_ADMIN))
7063                 return -EPERM;
7064 
7065         /*
7066          * The subvolume must remain read-only during send, protect against
7067          * making it RW. This also protects against deletion.
7068          */
7069         spin_lock(&send_root->root_item_lock);
7070         if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
7071                 dedupe_in_progress_warn(send_root);
7072                 spin_unlock(&send_root->root_item_lock);
7073                 return -EAGAIN;
7074         }
7075         send_root->send_in_progress++;
7076         spin_unlock(&send_root->root_item_lock);
7077 
7078         /*
7079          * Userspace tools do the checks and warn the user if it's
7080          * not RO.
7081          */
7082         if (!btrfs_root_readonly(send_root)) {
7083                 ret = -EPERM;
7084                 goto out;
7085         }
7086 
7087         /*
7088          * Check that we don't overflow at later allocations, we request
7089          * clone_sources_count + 1 items, and compare to unsigned long inside
7090          * access_ok.
7091          */
7092         if (arg->clone_sources_count >
7093             ULONG_MAX / sizeof(struct clone_root) - 1) {
7094                 ret = -EINVAL;
7095                 goto out;
7096         }
7097 
7098         if (!access_ok(arg->clone_sources,
7099                         sizeof(*arg->clone_sources) *
7100                         arg->clone_sources_count)) {
7101                 ret = -EFAULT;
7102                 goto out;
7103         }
7104 
7105         if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
7106                 ret = -EINVAL;
7107                 goto out;
7108         }
7109 
7110         sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
7111         if (!sctx) {
7112                 ret = -ENOMEM;
7113                 goto out;
7114         }
7115 
7116         INIT_LIST_HEAD(&sctx->new_refs);
7117         INIT_LIST_HEAD(&sctx->deleted_refs);
7118         INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
7119         INIT_LIST_HEAD(&sctx->name_cache_list);
7120 
7121         sctx->flags = arg->flags;
7122 
7123         sctx->send_filp = fget(arg->send_fd);
7124         if (!sctx->send_filp) {
7125                 ret = -EBADF;
7126                 goto out;
7127         }
7128 
7129         sctx->send_root = send_root;
7130         /*
7131          * Unlikely but possible, if the subvolume is marked for deletion but
7132          * is slow to remove the directory entry, send can still be started
7133          */
7134         if (btrfs_root_dead(sctx->send_root)) {
7135                 ret = -EPERM;
7136                 goto out;
7137         }
7138 
7139         sctx->clone_roots_cnt = arg->clone_sources_count;
7140 
7141         sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
7142         sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
7143         if (!sctx->send_buf) {
7144                 ret = -ENOMEM;
7145                 goto out;
7146         }
7147 
7148         sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
7149         if (!sctx->read_buf) {
7150                 ret = -ENOMEM;
7151                 goto out;
7152         }
7153 
7154         sctx->pending_dir_moves = RB_ROOT;
7155         sctx->waiting_dir_moves = RB_ROOT;
7156         sctx->orphan_dirs = RB_ROOT;
7157 
7158         alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
7159 
7160         sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
7161         if (!sctx->clone_roots) {
7162                 ret = -ENOMEM;
7163                 goto out;
7164         }
7165 
7166         alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
7167 
7168         if (arg->clone_sources_count) {
7169                 clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
7170                 if (!clone_sources_tmp) {
7171                         ret = -ENOMEM;
7172                         goto out;
7173                 }
7174 
7175                 ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
7176                                 alloc_size);
7177                 if (ret) {
7178                         ret = -EFAULT;
7179                         goto out;
7180                 }
7181 
7182                 for (i = 0; i < arg->clone_sources_count; i++) {
7183                         key.objectid = clone_sources_tmp[i];
7184                         key.type = BTRFS_ROOT_ITEM_KEY;
7185                         key.offset = (u64)-1;
7186 
7187                         index = srcu_read_lock(&fs_info->subvol_srcu);
7188 
7189                         clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
7190                         if (IS_ERR(clone_root)) {
7191                                 srcu_read_unlock(&fs_info->subvol_srcu, index);
7192                                 ret = PTR_ERR(clone_root);
7193                                 goto out;
7194                         }
7195                         spin_lock(&clone_root->root_item_lock);
7196                         if (!btrfs_root_readonly(clone_root) ||
7197                             btrfs_root_dead(clone_root)) {
7198                                 spin_unlock(&clone_root->root_item_lock);
7199                                 srcu_read_unlock(&fs_info->subvol_srcu, index);
7200                                 ret = -EPERM;
7201                                 goto out;
7202                         }
7203                         if (clone_root->dedupe_in_progress) {
7204                                 dedupe_in_progress_warn(clone_root);
7205                                 spin_unlock(&clone_root->root_item_lock);
7206                                 srcu_read_unlock(&fs_info->subvol_srcu, index);
7207                                 ret = -EAGAIN;
7208                                 goto out;
7209                         }
7210                         clone_root->send_in_progress++;
7211                         spin_unlock(&clone_root->root_item_lock);
7212                         srcu_read_unlock(&fs_info->subvol_srcu, index);
7213 
7214                         sctx->clone_roots[i].root = clone_root;
7215                         clone_sources_to_rollback = i + 1;
7216                 }
7217                 kvfree(clone_sources_tmp);
7218                 clone_sources_tmp = NULL;
7219         }
7220 
7221         if (arg->parent_root) {
7222                 key.objectid = arg->parent_root;
7223                 key.type = BTRFS_ROOT_ITEM_KEY;
7224                 key.offset = (u64)-1;
7225 
7226                 index = srcu_read_lock(&fs_info->subvol_srcu);
7227 
7228                 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
7229                 if (IS_ERR(sctx->parent_root)) {
7230                         srcu_read_unlock(&fs_info->subvol_srcu, index);
7231                         ret = PTR_ERR(sctx->parent_root);
7232                         goto out;
7233                 }
7234 
7235                 spin_lock(&sctx->parent_root->root_item_lock);
7236                 sctx->parent_root->send_in_progress++;
7237                 if (!btrfs_root_readonly(sctx->parent_root) ||
7238                                 btrfs_root_dead(sctx->parent_root)) {
7239                         spin_unlock(&sctx->parent_root->root_item_lock);
7240                         srcu_read_unlock(&fs_info->subvol_srcu, index);
7241                         ret = -EPERM;
7242                         goto out;
7243                 }
7244                 if (sctx->parent_root->dedupe_in_progress) {
7245                         dedupe_in_progress_warn(sctx->parent_root);
7246                         spin_unlock(&sctx->parent_root->root_item_lock);
7247                         srcu_read_unlock(&fs_info->subvol_srcu, index);
7248                         ret = -EAGAIN;
7249                         goto out;
7250                 }
7251                 spin_unlock(&sctx->parent_root->root_item_lock);
7252 
7253                 srcu_read_unlock(&fs_info->subvol_srcu, index);
7254         }
7255 
7256         /*
7257          * Clones from send_root are allowed, but only if the clone source
7258          * is behind the current send position. This is checked while searching
7259          * for possible clone sources.
7260          */
7261         sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
7262 
7263         /* We do a bsearch later */
7264         sort(sctx->clone_roots, sctx->clone_roots_cnt,
7265                         sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
7266                         NULL);
7267         sort_clone_roots = 1;
7268 
7269         ret = flush_delalloc_roots(sctx);
7270         if (ret)
7271                 goto out;
7272 
7273         ret = ensure_commit_roots_uptodate(sctx);
7274         if (ret)
7275                 goto out;
7276 
7277         mutex_lock(&fs_info->balance_mutex);
7278         if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
7279                 mutex_unlock(&fs_info->balance_mutex);
7280                 btrfs_warn_rl(fs_info,
7281                 "cannot run send because a balance operation is in progress");
7282                 ret = -EAGAIN;
7283                 goto out;
7284         }
7285         fs_info->send_in_progress++;
7286         mutex_unlock(&fs_info->balance_mutex);
7287 
7288         current->journal_info = BTRFS_SEND_TRANS_STUB;
7289         ret = send_subvol(sctx);
7290         current->journal_info = NULL;
7291         mutex_lock(&fs_info->balance_mutex);
7292         fs_info->send_in_progress--;
7293         mutex_unlock(&fs_info->balance_mutex);
7294         if (ret < 0)
7295                 goto out;
7296 
7297         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
7298                 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
7299                 if (ret < 0)
7300                         goto out;
7301                 ret = send_cmd(sctx);
7302                 if (ret < 0)
7303                         goto out;
7304         }
7305 
7306 out:
7307         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
7308         while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
7309                 struct rb_node *n;
7310                 struct pending_dir_move *pm;
7311 
7312                 n = rb_first(&sctx->pending_dir_moves);
7313                 pm = rb_entry(n, struct pending_dir_move, node);
7314                 while (!list_empty(&pm->list)) {
7315                         struct pending_dir_move *pm2;
7316 
7317                         pm2 = list_first_entry(&pm->list,
7318                                                struct pending_dir_move, list);
7319                         free_pending_move(sctx, pm2);
7320                 }
7321                 free_pending_move(sctx, pm);
7322         }
7323 
7324         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
7325         while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
7326                 struct rb_node *n;
7327                 struct waiting_dir_move *dm;
7328 
7329                 n = rb_first(&sctx->waiting_dir_moves);
7330                 dm = rb_entry(n, struct waiting_dir_move, node);
7331                 rb_erase(&dm->node, &sctx->waiting_dir_moves);
7332                 kfree(dm);
7333         }
7334 
7335         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
7336         while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
7337                 struct rb_node *n;
7338                 struct orphan_dir_info *odi;
7339 
7340                 n = rb_first(&sctx->orphan_dirs);
7341                 odi = rb_entry(n, struct orphan_dir_info, node);
7342                 free_orphan_dir_info(sctx, odi);
7343         }
7344 
7345         if (sort_clone_roots) {
7346                 for (i = 0; i < sctx->clone_roots_cnt; i++)
7347                         btrfs_root_dec_send_in_progress(
7348                                         sctx->clone_roots[i].root);
7349         } else {
7350                 for (i = 0; sctx && i < clone_sources_to_rollback; i++)
7351                         btrfs_root_dec_send_in_progress(
7352                                         sctx->clone_roots[i].root);
7353 
7354                 btrfs_root_dec_send_in_progress(send_root);
7355         }
7356         if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
7357                 btrfs_root_dec_send_in_progress(sctx->parent_root);
7358 
7359         kvfree(clone_sources_tmp);
7360 
7361         if (sctx) {
7362                 if (sctx->send_filp)
7363                         fput(sctx->send_filp);
7364 
7365                 kvfree(sctx->clone_roots);
7366                 kvfree(sctx->send_buf);
7367                 kvfree(sctx->read_buf);
7368 
7369                 name_cache_free(sctx);
7370 
7371                 kfree(sctx);
7372         }
7373 
7374         return ret;
7375 }

/* [<][>][^][v][top][bottom][index][help] */