1/* 2 * Copyright (C) 2012 Alexander Block. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19#include <linux/bsearch.h> 20#include <linux/fs.h> 21#include <linux/file.h> 22#include <linux/sort.h> 23#include <linux/mount.h> 24#include <linux/xattr.h> 25#include <linux/posix_acl_xattr.h> 26#include <linux/radix-tree.h> 27#include <linux/vmalloc.h> 28#include <linux/string.h> 29 30#include "send.h" 31#include "backref.h" 32#include "hash.h" 33#include "locking.h" 34#include "disk-io.h" 35#include "btrfs_inode.h" 36#include "transaction.h" 37 38static int g_verbose = 0; 39 40#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) 41 42/* 43 * A fs_path is a helper to dynamically build path names with unknown size. 44 * It reallocates the internal buffer on demand. 45 * It allows fast adding of path elements on the right side (normal path) and 46 * fast adding to the left side (reversed path). A reversed path can also be 47 * unreversed if needed. 48 */ 49struct fs_path { 50 union { 51 struct { 52 char *start; 53 char *end; 54 55 char *buf; 56 unsigned short buf_len:15; 57 unsigned short reversed:1; 58 char inline_buf[]; 59 }; 60 /* 61 * Average path length does not exceed 200 bytes, we'll have 62 * better packing in the slab and higher chance to satisfy 63 * a allocation later during send. 64 */ 65 char pad[256]; 66 }; 67}; 68#define FS_PATH_INLINE_SIZE \ 69 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) 70 71 72/* reused for each extent */ 73struct clone_root { 74 struct btrfs_root *root; 75 u64 ino; 76 u64 offset; 77 78 u64 found_refs; 79}; 80 81#define SEND_CTX_MAX_NAME_CACHE_SIZE 128 82#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) 83 84struct send_ctx { 85 struct file *send_filp; 86 loff_t send_off; 87 char *send_buf; 88 u32 send_size; 89 u32 send_max_size; 90 u64 total_send_size; 91 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 92 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 93 94 struct btrfs_root *send_root; 95 struct btrfs_root *parent_root; 96 struct clone_root *clone_roots; 97 int clone_roots_cnt; 98 99 /* current state of the compare_tree call */ 100 struct btrfs_path *left_path; 101 struct btrfs_path *right_path; 102 struct btrfs_key *cmp_key; 103 104 /* 105 * infos of the currently processed inode. In case of deleted inodes, 106 * these are the values from the deleted inode. 107 */ 108 u64 cur_ino; 109 u64 cur_inode_gen; 110 int cur_inode_new; 111 int cur_inode_new_gen; 112 int cur_inode_deleted; 113 u64 cur_inode_size; 114 u64 cur_inode_mode; 115 u64 cur_inode_rdev; 116 u64 cur_inode_last_extent; 117 118 u64 send_progress; 119 120 struct list_head new_refs; 121 struct list_head deleted_refs; 122 123 struct radix_tree_root name_cache; 124 struct list_head name_cache_list; 125 int name_cache_size; 126 127 struct file_ra_state ra; 128 129 char *read_buf; 130 131 /* 132 * We process inodes by their increasing order, so if before an 133 * incremental send we reverse the parent/child relationship of 134 * directories such that a directory with a lower inode number was 135 * the parent of a directory with a higher inode number, and the one 136 * becoming the new parent got renamed too, we can't rename/move the 137 * directory with lower inode number when we finish processing it - we 138 * must process the directory with higher inode number first, then 139 * rename/move it and then rename/move the directory with lower inode 140 * number. Example follows. 141 * 142 * Tree state when the first send was performed: 143 * 144 * . 145 * |-- a (ino 257) 146 * |-- b (ino 258) 147 * | 148 * | 149 * |-- c (ino 259) 150 * | |-- d (ino 260) 151 * | 152 * |-- c2 (ino 261) 153 * 154 * Tree state when the second (incremental) send is performed: 155 * 156 * . 157 * |-- a (ino 257) 158 * |-- b (ino 258) 159 * |-- c2 (ino 261) 160 * |-- d2 (ino 260) 161 * |-- cc (ino 259) 162 * 163 * The sequence of steps that lead to the second state was: 164 * 165 * mv /a/b/c/d /a/b/c2/d2 166 * mv /a/b/c /a/b/c2/d2/cc 167 * 168 * "c" has lower inode number, but we can't move it (2nd mv operation) 169 * before we move "d", which has higher inode number. 170 * 171 * So we just memorize which move/rename operations must be performed 172 * later when their respective parent is processed and moved/renamed. 173 */ 174 175 /* Indexed by parent directory inode number. */ 176 struct rb_root pending_dir_moves; 177 178 /* 179 * Reverse index, indexed by the inode number of a directory that 180 * is waiting for the move/rename of its immediate parent before its 181 * own move/rename can be performed. 182 */ 183 struct rb_root waiting_dir_moves; 184 185 /* 186 * A directory that is going to be rm'ed might have a child directory 187 * which is in the pending directory moves index above. In this case, 188 * the directory can only be removed after the move/rename of its child 189 * is performed. Example: 190 * 191 * Parent snapshot: 192 * 193 * . (ino 256) 194 * |-- a/ (ino 257) 195 * |-- b/ (ino 258) 196 * |-- c/ (ino 259) 197 * | |-- x/ (ino 260) 198 * | 199 * |-- y/ (ino 261) 200 * 201 * Send snapshot: 202 * 203 * . (ino 256) 204 * |-- a/ (ino 257) 205 * |-- b/ (ino 258) 206 * |-- YY/ (ino 261) 207 * |-- x/ (ino 260) 208 * 209 * Sequence of steps that lead to the send snapshot: 210 * rm -f /a/b/c/foo.txt 211 * mv /a/b/y /a/b/YY 212 * mv /a/b/c/x /a/b/YY 213 * rmdir /a/b/c 214 * 215 * When the child is processed, its move/rename is delayed until its 216 * parent is processed (as explained above), but all other operations 217 * like update utimes, chown, chgrp, etc, are performed and the paths 218 * that it uses for those operations must use the orphanized name of 219 * its parent (the directory we're going to rm later), so we need to 220 * memorize that name. 221 * 222 * Indexed by the inode number of the directory to be deleted. 223 */ 224 struct rb_root orphan_dirs; 225}; 226 227struct pending_dir_move { 228 struct rb_node node; 229 struct list_head list; 230 u64 parent_ino; 231 u64 ino; 232 u64 gen; 233 bool is_orphan; 234 struct list_head update_refs; 235}; 236 237struct waiting_dir_move { 238 struct rb_node node; 239 u64 ino; 240 /* 241 * There might be some directory that could not be removed because it 242 * was waiting for this directory inode to be moved first. Therefore 243 * after this directory is moved, we can try to rmdir the ino rmdir_ino. 244 */ 245 u64 rmdir_ino; 246 bool orphanized; 247}; 248 249struct orphan_dir_info { 250 struct rb_node node; 251 u64 ino; 252 u64 gen; 253}; 254 255struct name_cache_entry { 256 struct list_head list; 257 /* 258 * radix_tree has only 32bit entries but we need to handle 64bit inums. 259 * We use the lower 32bit of the 64bit inum to store it in the tree. If 260 * more then one inum would fall into the same entry, we use radix_list 261 * to store the additional entries. radix_list is also used to store 262 * entries where two entries have the same inum but different 263 * generations. 264 */ 265 struct list_head radix_list; 266 u64 ino; 267 u64 gen; 268 u64 parent_ino; 269 u64 parent_gen; 270 int ret; 271 int need_later_update; 272 int name_len; 273 char name[]; 274}; 275 276static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 277 278static struct waiting_dir_move * 279get_waiting_dir_move(struct send_ctx *sctx, u64 ino); 280 281static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); 282 283static int need_send_hole(struct send_ctx *sctx) 284{ 285 return (sctx->parent_root && !sctx->cur_inode_new && 286 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && 287 S_ISREG(sctx->cur_inode_mode)); 288} 289 290static void fs_path_reset(struct fs_path *p) 291{ 292 if (p->reversed) { 293 p->start = p->buf + p->buf_len - 1; 294 p->end = p->start; 295 *p->start = 0; 296 } else { 297 p->start = p->buf; 298 p->end = p->start; 299 *p->start = 0; 300 } 301} 302 303static struct fs_path *fs_path_alloc(void) 304{ 305 struct fs_path *p; 306 307 p = kmalloc(sizeof(*p), GFP_NOFS); 308 if (!p) 309 return NULL; 310 p->reversed = 0; 311 p->buf = p->inline_buf; 312 p->buf_len = FS_PATH_INLINE_SIZE; 313 fs_path_reset(p); 314 return p; 315} 316 317static struct fs_path *fs_path_alloc_reversed(void) 318{ 319 struct fs_path *p; 320 321 p = fs_path_alloc(); 322 if (!p) 323 return NULL; 324 p->reversed = 1; 325 fs_path_reset(p); 326 return p; 327} 328 329static void fs_path_free(struct fs_path *p) 330{ 331 if (!p) 332 return; 333 if (p->buf != p->inline_buf) 334 kfree(p->buf); 335 kfree(p); 336} 337 338static int fs_path_len(struct fs_path *p) 339{ 340 return p->end - p->start; 341} 342 343static int fs_path_ensure_buf(struct fs_path *p, int len) 344{ 345 char *tmp_buf; 346 int path_len; 347 int old_buf_len; 348 349 len++; 350 351 if (p->buf_len >= len) 352 return 0; 353 354 if (len > PATH_MAX) { 355 WARN_ON(1); 356 return -ENOMEM; 357 } 358 359 path_len = p->end - p->start; 360 old_buf_len = p->buf_len; 361 362 /* 363 * First time the inline_buf does not suffice 364 */ 365 if (p->buf == p->inline_buf) { 366 tmp_buf = kmalloc(len, GFP_NOFS); 367 if (tmp_buf) 368 memcpy(tmp_buf, p->buf, old_buf_len); 369 } else { 370 tmp_buf = krealloc(p->buf, len, GFP_NOFS); 371 } 372 if (!tmp_buf) 373 return -ENOMEM; 374 p->buf = tmp_buf; 375 /* 376 * The real size of the buffer is bigger, this will let the fast path 377 * happen most of the time 378 */ 379 p->buf_len = ksize(p->buf); 380 381 if (p->reversed) { 382 tmp_buf = p->buf + old_buf_len - path_len - 1; 383 p->end = p->buf + p->buf_len - 1; 384 p->start = p->end - path_len; 385 memmove(p->start, tmp_buf, path_len + 1); 386 } else { 387 p->start = p->buf; 388 p->end = p->start + path_len; 389 } 390 return 0; 391} 392 393static int fs_path_prepare_for_add(struct fs_path *p, int name_len, 394 char **prepared) 395{ 396 int ret; 397 int new_len; 398 399 new_len = p->end - p->start + name_len; 400 if (p->start != p->end) 401 new_len++; 402 ret = fs_path_ensure_buf(p, new_len); 403 if (ret < 0) 404 goto out; 405 406 if (p->reversed) { 407 if (p->start != p->end) 408 *--p->start = '/'; 409 p->start -= name_len; 410 *prepared = p->start; 411 } else { 412 if (p->start != p->end) 413 *p->end++ = '/'; 414 *prepared = p->end; 415 p->end += name_len; 416 *p->end = 0; 417 } 418 419out: 420 return ret; 421} 422 423static int fs_path_add(struct fs_path *p, const char *name, int name_len) 424{ 425 int ret; 426 char *prepared; 427 428 ret = fs_path_prepare_for_add(p, name_len, &prepared); 429 if (ret < 0) 430 goto out; 431 memcpy(prepared, name, name_len); 432 433out: 434 return ret; 435} 436 437static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 438{ 439 int ret; 440 char *prepared; 441 442 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); 443 if (ret < 0) 444 goto out; 445 memcpy(prepared, p2->start, p2->end - p2->start); 446 447out: 448 return ret; 449} 450 451static int fs_path_add_from_extent_buffer(struct fs_path *p, 452 struct extent_buffer *eb, 453 unsigned long off, int len) 454{ 455 int ret; 456 char *prepared; 457 458 ret = fs_path_prepare_for_add(p, len, &prepared); 459 if (ret < 0) 460 goto out; 461 462 read_extent_buffer(eb, prepared, off, len); 463 464out: 465 return ret; 466} 467 468static int fs_path_copy(struct fs_path *p, struct fs_path *from) 469{ 470 int ret; 471 472 p->reversed = from->reversed; 473 fs_path_reset(p); 474 475 ret = fs_path_add_path(p, from); 476 477 return ret; 478} 479 480 481static void fs_path_unreverse(struct fs_path *p) 482{ 483 char *tmp; 484 int len; 485 486 if (!p->reversed) 487 return; 488 489 tmp = p->start; 490 len = p->end - p->start; 491 p->start = p->buf; 492 p->end = p->start + len; 493 memmove(p->start, tmp, len + 1); 494 p->reversed = 0; 495} 496 497static struct btrfs_path *alloc_path_for_send(void) 498{ 499 struct btrfs_path *path; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return NULL; 504 path->search_commit_root = 1; 505 path->skip_locking = 1; 506 path->need_commit_sem = 1; 507 return path; 508} 509 510static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 511{ 512 int ret; 513 mm_segment_t old_fs; 514 u32 pos = 0; 515 516 old_fs = get_fs(); 517 set_fs(KERNEL_DS); 518 519 while (pos < len) { 520 ret = vfs_write(filp, (__force const char __user *)buf + pos, 521 len - pos, off); 522 /* TODO handle that correctly */ 523 /*if (ret == -ERESTARTSYS) { 524 continue; 525 }*/ 526 if (ret < 0) 527 goto out; 528 if (ret == 0) { 529 ret = -EIO; 530 goto out; 531 } 532 pos += ret; 533 } 534 535 ret = 0; 536 537out: 538 set_fs(old_fs); 539 return ret; 540} 541 542static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) 543{ 544 struct btrfs_tlv_header *hdr; 545 int total_len = sizeof(*hdr) + len; 546 int left = sctx->send_max_size - sctx->send_size; 547 548 if (unlikely(left < total_len)) 549 return -EOVERFLOW; 550 551 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); 552 hdr->tlv_type = cpu_to_le16(attr); 553 hdr->tlv_len = cpu_to_le16(len); 554 memcpy(hdr + 1, data, len); 555 sctx->send_size += total_len; 556 557 return 0; 558} 559 560#define TLV_PUT_DEFINE_INT(bits) \ 561 static int tlv_put_u##bits(struct send_ctx *sctx, \ 562 u##bits attr, u##bits value) \ 563 { \ 564 __le##bits __tmp = cpu_to_le##bits(value); \ 565 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ 566 } 567 568TLV_PUT_DEFINE_INT(64) 569 570static int tlv_put_string(struct send_ctx *sctx, u16 attr, 571 const char *str, int len) 572{ 573 if (len == -1) 574 len = strlen(str); 575 return tlv_put(sctx, attr, str, len); 576} 577 578static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, 579 const u8 *uuid) 580{ 581 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 582} 583 584static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 585 struct extent_buffer *eb, 586 struct btrfs_timespec *ts) 587{ 588 struct btrfs_timespec bts; 589 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); 590 return tlv_put(sctx, attr, &bts, sizeof(bts)); 591} 592 593 594#define TLV_PUT(sctx, attrtype, attrlen, data) \ 595 do { \ 596 ret = tlv_put(sctx, attrtype, attrlen, data); \ 597 if (ret < 0) \ 598 goto tlv_put_failure; \ 599 } while (0) 600 601#define TLV_PUT_INT(sctx, attrtype, bits, value) \ 602 do { \ 603 ret = tlv_put_u##bits(sctx, attrtype, value); \ 604 if (ret < 0) \ 605 goto tlv_put_failure; \ 606 } while (0) 607 608#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) 609#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) 610#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) 611#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) 612#define TLV_PUT_STRING(sctx, attrtype, str, len) \ 613 do { \ 614 ret = tlv_put_string(sctx, attrtype, str, len); \ 615 if (ret < 0) \ 616 goto tlv_put_failure; \ 617 } while (0) 618#define TLV_PUT_PATH(sctx, attrtype, p) \ 619 do { \ 620 ret = tlv_put_string(sctx, attrtype, p->start, \ 621 p->end - p->start); \ 622 if (ret < 0) \ 623 goto tlv_put_failure; \ 624 } while(0) 625#define TLV_PUT_UUID(sctx, attrtype, uuid) \ 626 do { \ 627 ret = tlv_put_uuid(sctx, attrtype, uuid); \ 628 if (ret < 0) \ 629 goto tlv_put_failure; \ 630 } while (0) 631#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 632 do { \ 633 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 634 if (ret < 0) \ 635 goto tlv_put_failure; \ 636 } while (0) 637 638static int send_header(struct send_ctx *sctx) 639{ 640 struct btrfs_stream_header hdr; 641 642 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); 643 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); 644 645 return write_buf(sctx->send_filp, &hdr, sizeof(hdr), 646 &sctx->send_off); 647} 648 649/* 650 * For each command/item we want to send to userspace, we call this function. 651 */ 652static int begin_cmd(struct send_ctx *sctx, int cmd) 653{ 654 struct btrfs_cmd_header *hdr; 655 656 if (WARN_ON(!sctx->send_buf)) 657 return -EINVAL; 658 659 BUG_ON(sctx->send_size); 660 661 sctx->send_size += sizeof(*hdr); 662 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 663 hdr->cmd = cpu_to_le16(cmd); 664 665 return 0; 666} 667 668static int send_cmd(struct send_ctx *sctx) 669{ 670 int ret; 671 struct btrfs_cmd_header *hdr; 672 u32 crc; 673 674 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 675 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 676 hdr->crc = 0; 677 678 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 679 hdr->crc = cpu_to_le32(crc); 680 681 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 682 &sctx->send_off); 683 684 sctx->total_send_size += sctx->send_size; 685 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; 686 sctx->send_size = 0; 687 688 return ret; 689} 690 691/* 692 * Sends a move instruction to user space 693 */ 694static int send_rename(struct send_ctx *sctx, 695 struct fs_path *from, struct fs_path *to) 696{ 697 int ret; 698 699verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); 700 701 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 702 if (ret < 0) 703 goto out; 704 705 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 706 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); 707 708 ret = send_cmd(sctx); 709 710tlv_put_failure: 711out: 712 return ret; 713} 714 715/* 716 * Sends a link instruction to user space 717 */ 718static int send_link(struct send_ctx *sctx, 719 struct fs_path *path, struct fs_path *lnk) 720{ 721 int ret; 722 723verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); 724 725 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 726 if (ret < 0) 727 goto out; 728 729 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 730 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); 731 732 ret = send_cmd(sctx); 733 734tlv_put_failure: 735out: 736 return ret; 737} 738 739/* 740 * Sends an unlink instruction to user space 741 */ 742static int send_unlink(struct send_ctx *sctx, struct fs_path *path) 743{ 744 int ret; 745 746verbose_printk("btrfs: send_unlink %s\n", path->start); 747 748 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 749 if (ret < 0) 750 goto out; 751 752 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 753 754 ret = send_cmd(sctx); 755 756tlv_put_failure: 757out: 758 return ret; 759} 760 761/* 762 * Sends a rmdir instruction to user space 763 */ 764static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) 765{ 766 int ret; 767 768verbose_printk("btrfs: send_rmdir %s\n", path->start); 769 770 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 771 if (ret < 0) 772 goto out; 773 774 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 775 776 ret = send_cmd(sctx); 777 778tlv_put_failure: 779out: 780 return ret; 781} 782 783/* 784 * Helper function to retrieve some fields from an inode item. 785 */ 786static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, 787 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, 788 u64 *gid, u64 *rdev) 789{ 790 int ret; 791 struct btrfs_inode_item *ii; 792 struct btrfs_key key; 793 794 key.objectid = ino; 795 key.type = BTRFS_INODE_ITEM_KEY; 796 key.offset = 0; 797 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 798 if (ret) { 799 if (ret > 0) 800 ret = -ENOENT; 801 return ret; 802 } 803 804 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 805 struct btrfs_inode_item); 806 if (size) 807 *size = btrfs_inode_size(path->nodes[0], ii); 808 if (gen) 809 *gen = btrfs_inode_generation(path->nodes[0], ii); 810 if (mode) 811 *mode = btrfs_inode_mode(path->nodes[0], ii); 812 if (uid) 813 *uid = btrfs_inode_uid(path->nodes[0], ii); 814 if (gid) 815 *gid = btrfs_inode_gid(path->nodes[0], ii); 816 if (rdev) 817 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 818 819 return ret; 820} 821 822static int get_inode_info(struct btrfs_root *root, 823 u64 ino, u64 *size, u64 *gen, 824 u64 *mode, u64 *uid, u64 *gid, 825 u64 *rdev) 826{ 827 struct btrfs_path *path; 828 int ret; 829 830 path = alloc_path_for_send(); 831 if (!path) 832 return -ENOMEM; 833 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, 834 rdev); 835 btrfs_free_path(path); 836 return ret; 837} 838 839typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, 840 struct fs_path *p, 841 void *ctx); 842 843/* 844 * Helper function to iterate the entries in ONE btrfs_inode_ref or 845 * btrfs_inode_extref. 846 * The iterate callback may return a non zero value to stop iteration. This can 847 * be a negative value for error codes or 1 to simply stop it. 848 * 849 * path must point to the INODE_REF or INODE_EXTREF when called. 850 */ 851static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 852 struct btrfs_key *found_key, int resolve, 853 iterate_inode_ref_t iterate, void *ctx) 854{ 855 struct extent_buffer *eb = path->nodes[0]; 856 struct btrfs_item *item; 857 struct btrfs_inode_ref *iref; 858 struct btrfs_inode_extref *extref; 859 struct btrfs_path *tmp_path; 860 struct fs_path *p; 861 u32 cur = 0; 862 u32 total; 863 int slot = path->slots[0]; 864 u32 name_len; 865 char *start; 866 int ret = 0; 867 int num = 0; 868 int index; 869 u64 dir; 870 unsigned long name_off; 871 unsigned long elem_size; 872 unsigned long ptr; 873 874 p = fs_path_alloc_reversed(); 875 if (!p) 876 return -ENOMEM; 877 878 tmp_path = alloc_path_for_send(); 879 if (!tmp_path) { 880 fs_path_free(p); 881 return -ENOMEM; 882 } 883 884 885 if (found_key->type == BTRFS_INODE_REF_KEY) { 886 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 887 struct btrfs_inode_ref); 888 item = btrfs_item_nr(slot); 889 total = btrfs_item_size(eb, item); 890 elem_size = sizeof(*iref); 891 } else { 892 ptr = btrfs_item_ptr_offset(eb, slot); 893 total = btrfs_item_size_nr(eb, slot); 894 elem_size = sizeof(*extref); 895 } 896 897 while (cur < total) { 898 fs_path_reset(p); 899 900 if (found_key->type == BTRFS_INODE_REF_KEY) { 901 iref = (struct btrfs_inode_ref *)(ptr + cur); 902 name_len = btrfs_inode_ref_name_len(eb, iref); 903 name_off = (unsigned long)(iref + 1); 904 index = btrfs_inode_ref_index(eb, iref); 905 dir = found_key->offset; 906 } else { 907 extref = (struct btrfs_inode_extref *)(ptr + cur); 908 name_len = btrfs_inode_extref_name_len(eb, extref); 909 name_off = (unsigned long)&extref->name; 910 index = btrfs_inode_extref_index(eb, extref); 911 dir = btrfs_inode_extref_parent(eb, extref); 912 } 913 914 if (resolve) { 915 start = btrfs_ref_to_path(root, tmp_path, name_len, 916 name_off, eb, dir, 917 p->buf, p->buf_len); 918 if (IS_ERR(start)) { 919 ret = PTR_ERR(start); 920 goto out; 921 } 922 if (start < p->buf) { 923 /* overflow , try again with larger buffer */ 924 ret = fs_path_ensure_buf(p, 925 p->buf_len + p->buf - start); 926 if (ret < 0) 927 goto out; 928 start = btrfs_ref_to_path(root, tmp_path, 929 name_len, name_off, 930 eb, dir, 931 p->buf, p->buf_len); 932 if (IS_ERR(start)) { 933 ret = PTR_ERR(start); 934 goto out; 935 } 936 BUG_ON(start < p->buf); 937 } 938 p->start = start; 939 } else { 940 ret = fs_path_add_from_extent_buffer(p, eb, name_off, 941 name_len); 942 if (ret < 0) 943 goto out; 944 } 945 946 cur += elem_size + name_len; 947 ret = iterate(num, dir, index, p, ctx); 948 if (ret) 949 goto out; 950 num++; 951 } 952 953out: 954 btrfs_free_path(tmp_path); 955 fs_path_free(p); 956 return ret; 957} 958 959typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, 960 const char *name, int name_len, 961 const char *data, int data_len, 962 u8 type, void *ctx); 963 964/* 965 * Helper function to iterate the entries in ONE btrfs_dir_item. 966 * The iterate callback may return a non zero value to stop iteration. This can 967 * be a negative value for error codes or 1 to simply stop it. 968 * 969 * path must point to the dir item when called. 970 */ 971static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, 972 struct btrfs_key *found_key, 973 iterate_dir_item_t iterate, void *ctx) 974{ 975 int ret = 0; 976 struct extent_buffer *eb; 977 struct btrfs_item *item; 978 struct btrfs_dir_item *di; 979 struct btrfs_key di_key; 980 char *buf = NULL; 981 int buf_len; 982 u32 name_len; 983 u32 data_len; 984 u32 cur; 985 u32 len; 986 u32 total; 987 int slot; 988 int num; 989 u8 type; 990 991 /* 992 * Start with a small buffer (1 page). If later we end up needing more 993 * space, which can happen for xattrs on a fs with a leaf size greater 994 * then the page size, attempt to increase the buffer. Typically xattr 995 * values are small. 996 */ 997 buf_len = PATH_MAX; 998 buf = kmalloc(buf_len, GFP_NOFS); 999 if (!buf) { 1000 ret = -ENOMEM; 1001 goto out; 1002 } 1003 1004 eb = path->nodes[0]; 1005 slot = path->slots[0]; 1006 item = btrfs_item_nr(slot); 1007 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 1008 cur = 0; 1009 len = 0; 1010 total = btrfs_item_size(eb, item); 1011 1012 num = 0; 1013 while (cur < total) { 1014 name_len = btrfs_dir_name_len(eb, di); 1015 data_len = btrfs_dir_data_len(eb, di); 1016 type = btrfs_dir_type(eb, di); 1017 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1018 1019 if (type == BTRFS_FT_XATTR) { 1020 if (name_len > XATTR_NAME_MAX) { 1021 ret = -ENAMETOOLONG; 1022 goto out; 1023 } 1024 if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { 1025 ret = -E2BIG; 1026 goto out; 1027 } 1028 } else { 1029 /* 1030 * Path too long 1031 */ 1032 if (name_len + data_len > PATH_MAX) { 1033 ret = -ENAMETOOLONG; 1034 goto out; 1035 } 1036 } 1037 1038 if (name_len + data_len > buf_len) { 1039 buf_len = name_len + data_len; 1040 if (is_vmalloc_addr(buf)) { 1041 vfree(buf); 1042 buf = NULL; 1043 } else { 1044 char *tmp = krealloc(buf, buf_len, 1045 GFP_NOFS | __GFP_NOWARN); 1046 1047 if (!tmp) 1048 kfree(buf); 1049 buf = tmp; 1050 } 1051 if (!buf) { 1052 buf = vmalloc(buf_len); 1053 if (!buf) { 1054 ret = -ENOMEM; 1055 goto out; 1056 } 1057 } 1058 } 1059 1060 read_extent_buffer(eb, buf, (unsigned long)(di + 1), 1061 name_len + data_len); 1062 1063 len = sizeof(*di) + name_len + data_len; 1064 di = (struct btrfs_dir_item *)((char *)di + len); 1065 cur += len; 1066 1067 ret = iterate(num, &di_key, buf, name_len, buf + name_len, 1068 data_len, type, ctx); 1069 if (ret < 0) 1070 goto out; 1071 if (ret) { 1072 ret = 0; 1073 goto out; 1074 } 1075 1076 num++; 1077 } 1078 1079out: 1080 kvfree(buf); 1081 return ret; 1082} 1083 1084static int __copy_first_ref(int num, u64 dir, int index, 1085 struct fs_path *p, void *ctx) 1086{ 1087 int ret; 1088 struct fs_path *pt = ctx; 1089 1090 ret = fs_path_copy(pt, p); 1091 if (ret < 0) 1092 return ret; 1093 1094 /* we want the first only */ 1095 return 1; 1096} 1097 1098/* 1099 * Retrieve the first path of an inode. If an inode has more then one 1100 * ref/hardlink, this is ignored. 1101 */ 1102static int get_inode_path(struct btrfs_root *root, 1103 u64 ino, struct fs_path *path) 1104{ 1105 int ret; 1106 struct btrfs_key key, found_key; 1107 struct btrfs_path *p; 1108 1109 p = alloc_path_for_send(); 1110 if (!p) 1111 return -ENOMEM; 1112 1113 fs_path_reset(path); 1114 1115 key.objectid = ino; 1116 key.type = BTRFS_INODE_REF_KEY; 1117 key.offset = 0; 1118 1119 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1120 if (ret < 0) 1121 goto out; 1122 if (ret) { 1123 ret = 1; 1124 goto out; 1125 } 1126 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1127 if (found_key.objectid != ino || 1128 (found_key.type != BTRFS_INODE_REF_KEY && 1129 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1130 ret = -ENOENT; 1131 goto out; 1132 } 1133 1134 ret = iterate_inode_ref(root, p, &found_key, 1, 1135 __copy_first_ref, path); 1136 if (ret < 0) 1137 goto out; 1138 ret = 0; 1139 1140out: 1141 btrfs_free_path(p); 1142 return ret; 1143} 1144 1145struct backref_ctx { 1146 struct send_ctx *sctx; 1147 1148 struct btrfs_path *path; 1149 /* number of total found references */ 1150 u64 found; 1151 1152 /* 1153 * used for clones found in send_root. clones found behind cur_objectid 1154 * and cur_offset are not considered as allowed clones. 1155 */ 1156 u64 cur_objectid; 1157 u64 cur_offset; 1158 1159 /* may be truncated in case it's the last extent in a file */ 1160 u64 extent_len; 1161 1162 /* data offset in the file extent item */ 1163 u64 data_offset; 1164 1165 /* Just to check for bugs in backref resolving */ 1166 int found_itself; 1167}; 1168 1169static int __clone_root_cmp_bsearch(const void *key, const void *elt) 1170{ 1171 u64 root = (u64)(uintptr_t)key; 1172 struct clone_root *cr = (struct clone_root *)elt; 1173 1174 if (root < cr->root->objectid) 1175 return -1; 1176 if (root > cr->root->objectid) 1177 return 1; 1178 return 0; 1179} 1180 1181static int __clone_root_cmp_sort(const void *e1, const void *e2) 1182{ 1183 struct clone_root *cr1 = (struct clone_root *)e1; 1184 struct clone_root *cr2 = (struct clone_root *)e2; 1185 1186 if (cr1->root->objectid < cr2->root->objectid) 1187 return -1; 1188 if (cr1->root->objectid > cr2->root->objectid) 1189 return 1; 1190 return 0; 1191} 1192 1193/* 1194 * Called for every backref that is found for the current extent. 1195 * Results are collected in sctx->clone_roots->ino/offset/found_refs 1196 */ 1197static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) 1198{ 1199 struct backref_ctx *bctx = ctx_; 1200 struct clone_root *found; 1201 int ret; 1202 u64 i_size; 1203 1204 /* First check if the root is in the list of accepted clone sources */ 1205 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1206 bctx->sctx->clone_roots_cnt, 1207 sizeof(struct clone_root), 1208 __clone_root_cmp_bsearch); 1209 if (!found) 1210 return 0; 1211 1212 if (found->root == bctx->sctx->send_root && 1213 ino == bctx->cur_objectid && 1214 offset == bctx->cur_offset) { 1215 bctx->found_itself = 1; 1216 } 1217 1218 /* 1219 * There are inodes that have extents that lie behind its i_size. Don't 1220 * accept clones from these extents. 1221 */ 1222 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, 1223 NULL, NULL, NULL); 1224 btrfs_release_path(bctx->path); 1225 if (ret < 0) 1226 return ret; 1227 1228 if (offset + bctx->data_offset + bctx->extent_len > i_size) 1229 return 0; 1230 1231 /* 1232 * Make sure we don't consider clones from send_root that are 1233 * behind the current inode/offset. 1234 */ 1235 if (found->root == bctx->sctx->send_root) { 1236 /* 1237 * TODO for the moment we don't accept clones from the inode 1238 * that is currently send. We may change this when 1239 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same 1240 * file. 1241 */ 1242 if (ino >= bctx->cur_objectid) 1243 return 0; 1244#if 0 1245 if (ino > bctx->cur_objectid) 1246 return 0; 1247 if (offset + bctx->extent_len > bctx->cur_offset) 1248 return 0; 1249#endif 1250 } 1251 1252 bctx->found++; 1253 found->found_refs++; 1254 if (ino < found->ino) { 1255 found->ino = ino; 1256 found->offset = offset; 1257 } else if (found->ino == ino) { 1258 /* 1259 * same extent found more then once in the same file. 1260 */ 1261 if (found->offset > offset + bctx->extent_len) 1262 found->offset = offset; 1263 } 1264 1265 return 0; 1266} 1267 1268/* 1269 * Given an inode, offset and extent item, it finds a good clone for a clone 1270 * instruction. Returns -ENOENT when none could be found. The function makes 1271 * sure that the returned clone is usable at the point where sending is at the 1272 * moment. This means, that no clones are accepted which lie behind the current 1273 * inode+offset. 1274 * 1275 * path must point to the extent item when called. 1276 */ 1277static int find_extent_clone(struct send_ctx *sctx, 1278 struct btrfs_path *path, 1279 u64 ino, u64 data_offset, 1280 u64 ino_size, 1281 struct clone_root **found) 1282{ 1283 int ret; 1284 int extent_type; 1285 u64 logical; 1286 u64 disk_byte; 1287 u64 num_bytes; 1288 u64 extent_item_pos; 1289 u64 flags = 0; 1290 struct btrfs_file_extent_item *fi; 1291 struct extent_buffer *eb = path->nodes[0]; 1292 struct backref_ctx *backref_ctx = NULL; 1293 struct clone_root *cur_clone_root; 1294 struct btrfs_key found_key; 1295 struct btrfs_path *tmp_path; 1296 int compressed; 1297 u32 i; 1298 1299 tmp_path = alloc_path_for_send(); 1300 if (!tmp_path) 1301 return -ENOMEM; 1302 1303 /* We only use this path under the commit sem */ 1304 tmp_path->need_commit_sem = 0; 1305 1306 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1307 if (!backref_ctx) { 1308 ret = -ENOMEM; 1309 goto out; 1310 } 1311 1312 backref_ctx->path = tmp_path; 1313 1314 if (data_offset >= ino_size) { 1315 /* 1316 * There may be extents that lie behind the file's size. 1317 * I at least had this in combination with snapshotting while 1318 * writing large files. 1319 */ 1320 ret = 0; 1321 goto out; 1322 } 1323 1324 fi = btrfs_item_ptr(eb, path->slots[0], 1325 struct btrfs_file_extent_item); 1326 extent_type = btrfs_file_extent_type(eb, fi); 1327 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1328 ret = -ENOENT; 1329 goto out; 1330 } 1331 compressed = btrfs_file_extent_compression(eb, fi); 1332 1333 num_bytes = btrfs_file_extent_num_bytes(eb, fi); 1334 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 1335 if (disk_byte == 0) { 1336 ret = -ENOENT; 1337 goto out; 1338 } 1339 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1340 1341 down_read(&sctx->send_root->fs_info->commit_root_sem); 1342 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, 1343 &found_key, &flags); 1344 up_read(&sctx->send_root->fs_info->commit_root_sem); 1345 btrfs_release_path(tmp_path); 1346 1347 if (ret < 0) 1348 goto out; 1349 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1350 ret = -EIO; 1351 goto out; 1352 } 1353 1354 /* 1355 * Setup the clone roots. 1356 */ 1357 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1358 cur_clone_root = sctx->clone_roots + i; 1359 cur_clone_root->ino = (u64)-1; 1360 cur_clone_root->offset = 0; 1361 cur_clone_root->found_refs = 0; 1362 } 1363 1364 backref_ctx->sctx = sctx; 1365 backref_ctx->found = 0; 1366 backref_ctx->cur_objectid = ino; 1367 backref_ctx->cur_offset = data_offset; 1368 backref_ctx->found_itself = 0; 1369 backref_ctx->extent_len = num_bytes; 1370 /* 1371 * For non-compressed extents iterate_extent_inodes() gives us extent 1372 * offsets that already take into account the data offset, but not for 1373 * compressed extents, since the offset is logical and not relative to 1374 * the physical extent locations. We must take this into account to 1375 * avoid sending clone offsets that go beyond the source file's size, 1376 * which would result in the clone ioctl failing with -EINVAL on the 1377 * receiving end. 1378 */ 1379 if (compressed == BTRFS_COMPRESS_NONE) 1380 backref_ctx->data_offset = 0; 1381 else 1382 backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi); 1383 1384 /* 1385 * The last extent of a file may be too large due to page alignment. 1386 * We need to adjust extent_len in this case so that the checks in 1387 * __iterate_backrefs work. 1388 */ 1389 if (data_offset + num_bytes >= ino_size) 1390 backref_ctx->extent_len = ino_size - data_offset; 1391 1392 /* 1393 * Now collect all backrefs. 1394 */ 1395 if (compressed == BTRFS_COMPRESS_NONE) 1396 extent_item_pos = logical - found_key.objectid; 1397 else 1398 extent_item_pos = 0; 1399 ret = iterate_extent_inodes(sctx->send_root->fs_info, 1400 found_key.objectid, extent_item_pos, 1, 1401 __iterate_backrefs, backref_ctx); 1402 1403 if (ret < 0) 1404 goto out; 1405 1406 if (!backref_ctx->found_itself) { 1407 /* found a bug in backref code? */ 1408 ret = -EIO; 1409 btrfs_err(sctx->send_root->fs_info, "did not find backref in " 1410 "send_root. inode=%llu, offset=%llu, " 1411 "disk_byte=%llu found extent=%llu", 1412 ino, data_offset, disk_byte, found_key.objectid); 1413 goto out; 1414 } 1415 1416verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " 1417 "ino=%llu, " 1418 "num_bytes=%llu, logical=%llu\n", 1419 data_offset, ino, num_bytes, logical); 1420 1421 if (!backref_ctx->found) 1422 verbose_printk("btrfs: no clones found\n"); 1423 1424 cur_clone_root = NULL; 1425 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1426 if (sctx->clone_roots[i].found_refs) { 1427 if (!cur_clone_root) 1428 cur_clone_root = sctx->clone_roots + i; 1429 else if (sctx->clone_roots[i].root == sctx->send_root) 1430 /* prefer clones from send_root over others */ 1431 cur_clone_root = sctx->clone_roots + i; 1432 } 1433 1434 } 1435 1436 if (cur_clone_root) { 1437 *found = cur_clone_root; 1438 ret = 0; 1439 } else { 1440 ret = -ENOENT; 1441 } 1442 1443out: 1444 btrfs_free_path(tmp_path); 1445 kfree(backref_ctx); 1446 return ret; 1447} 1448 1449static int read_symlink(struct btrfs_root *root, 1450 u64 ino, 1451 struct fs_path *dest) 1452{ 1453 int ret; 1454 struct btrfs_path *path; 1455 struct btrfs_key key; 1456 struct btrfs_file_extent_item *ei; 1457 u8 type; 1458 u8 compression; 1459 unsigned long off; 1460 int len; 1461 1462 path = alloc_path_for_send(); 1463 if (!path) 1464 return -ENOMEM; 1465 1466 key.objectid = ino; 1467 key.type = BTRFS_EXTENT_DATA_KEY; 1468 key.offset = 0; 1469 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1470 if (ret < 0) 1471 goto out; 1472 if (ret) { 1473 /* 1474 * An empty symlink inode. Can happen in rare error paths when 1475 * creating a symlink (transaction committed before the inode 1476 * eviction handler removed the symlink inode items and a crash 1477 * happened in between or the subvol was snapshoted in between). 1478 * Print an informative message to dmesg/syslog so that the user 1479 * can delete the symlink. 1480 */ 1481 btrfs_err(root->fs_info, 1482 "Found empty symlink inode %llu at root %llu", 1483 ino, root->root_key.objectid); 1484 ret = -EIO; 1485 goto out; 1486 } 1487 1488 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 1489 struct btrfs_file_extent_item); 1490 type = btrfs_file_extent_type(path->nodes[0], ei); 1491 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1492 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); 1493 BUG_ON(compression); 1494 1495 off = btrfs_file_extent_inline_start(ei); 1496 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1497 1498 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1499 1500out: 1501 btrfs_free_path(path); 1502 return ret; 1503} 1504 1505/* 1506 * Helper function to generate a file name that is unique in the root of 1507 * send_root and parent_root. This is used to generate names for orphan inodes. 1508 */ 1509static int gen_unique_name(struct send_ctx *sctx, 1510 u64 ino, u64 gen, 1511 struct fs_path *dest) 1512{ 1513 int ret = 0; 1514 struct btrfs_path *path; 1515 struct btrfs_dir_item *di; 1516 char tmp[64]; 1517 int len; 1518 u64 idx = 0; 1519 1520 path = alloc_path_for_send(); 1521 if (!path) 1522 return -ENOMEM; 1523 1524 while (1) { 1525 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", 1526 ino, gen, idx); 1527 ASSERT(len < sizeof(tmp)); 1528 1529 di = btrfs_lookup_dir_item(NULL, sctx->send_root, 1530 path, BTRFS_FIRST_FREE_OBJECTID, 1531 tmp, strlen(tmp), 0); 1532 btrfs_release_path(path); 1533 if (IS_ERR(di)) { 1534 ret = PTR_ERR(di); 1535 goto out; 1536 } 1537 if (di) { 1538 /* not unique, try again */ 1539 idx++; 1540 continue; 1541 } 1542 1543 if (!sctx->parent_root) { 1544 /* unique */ 1545 ret = 0; 1546 break; 1547 } 1548 1549 di = btrfs_lookup_dir_item(NULL, sctx->parent_root, 1550 path, BTRFS_FIRST_FREE_OBJECTID, 1551 tmp, strlen(tmp), 0); 1552 btrfs_release_path(path); 1553 if (IS_ERR(di)) { 1554 ret = PTR_ERR(di); 1555 goto out; 1556 } 1557 if (di) { 1558 /* not unique, try again */ 1559 idx++; 1560 continue; 1561 } 1562 /* unique */ 1563 break; 1564 } 1565 1566 ret = fs_path_add(dest, tmp, strlen(tmp)); 1567 1568out: 1569 btrfs_free_path(path); 1570 return ret; 1571} 1572 1573enum inode_state { 1574 inode_state_no_change, 1575 inode_state_will_create, 1576 inode_state_did_create, 1577 inode_state_will_delete, 1578 inode_state_did_delete, 1579}; 1580 1581static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) 1582{ 1583 int ret; 1584 int left_ret; 1585 int right_ret; 1586 u64 left_gen; 1587 u64 right_gen; 1588 1589 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, 1590 NULL, NULL); 1591 if (ret < 0 && ret != -ENOENT) 1592 goto out; 1593 left_ret = ret; 1594 1595 if (!sctx->parent_root) { 1596 right_ret = -ENOENT; 1597 } else { 1598 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, 1599 NULL, NULL, NULL, NULL); 1600 if (ret < 0 && ret != -ENOENT) 1601 goto out; 1602 right_ret = ret; 1603 } 1604 1605 if (!left_ret && !right_ret) { 1606 if (left_gen == gen && right_gen == gen) { 1607 ret = inode_state_no_change; 1608 } else if (left_gen == gen) { 1609 if (ino < sctx->send_progress) 1610 ret = inode_state_did_create; 1611 else 1612 ret = inode_state_will_create; 1613 } else if (right_gen == gen) { 1614 if (ino < sctx->send_progress) 1615 ret = inode_state_did_delete; 1616 else 1617 ret = inode_state_will_delete; 1618 } else { 1619 ret = -ENOENT; 1620 } 1621 } else if (!left_ret) { 1622 if (left_gen == gen) { 1623 if (ino < sctx->send_progress) 1624 ret = inode_state_did_create; 1625 else 1626 ret = inode_state_will_create; 1627 } else { 1628 ret = -ENOENT; 1629 } 1630 } else if (!right_ret) { 1631 if (right_gen == gen) { 1632 if (ino < sctx->send_progress) 1633 ret = inode_state_did_delete; 1634 else 1635 ret = inode_state_will_delete; 1636 } else { 1637 ret = -ENOENT; 1638 } 1639 } else { 1640 ret = -ENOENT; 1641 } 1642 1643out: 1644 return ret; 1645} 1646 1647static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) 1648{ 1649 int ret; 1650 1651 ret = get_cur_inode_state(sctx, ino, gen); 1652 if (ret < 0) 1653 goto out; 1654 1655 if (ret == inode_state_no_change || 1656 ret == inode_state_did_create || 1657 ret == inode_state_will_delete) 1658 ret = 1; 1659 else 1660 ret = 0; 1661 1662out: 1663 return ret; 1664} 1665 1666/* 1667 * Helper function to lookup a dir item in a dir. 1668 */ 1669static int lookup_dir_item_inode(struct btrfs_root *root, 1670 u64 dir, const char *name, int name_len, 1671 u64 *found_inode, 1672 u8 *found_type) 1673{ 1674 int ret = 0; 1675 struct btrfs_dir_item *di; 1676 struct btrfs_key key; 1677 struct btrfs_path *path; 1678 1679 path = alloc_path_for_send(); 1680 if (!path) 1681 return -ENOMEM; 1682 1683 di = btrfs_lookup_dir_item(NULL, root, path, 1684 dir, name, name_len, 0); 1685 if (!di) { 1686 ret = -ENOENT; 1687 goto out; 1688 } 1689 if (IS_ERR(di)) { 1690 ret = PTR_ERR(di); 1691 goto out; 1692 } 1693 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1694 if (key.type == BTRFS_ROOT_ITEM_KEY) { 1695 ret = -ENOENT; 1696 goto out; 1697 } 1698 *found_inode = key.objectid; 1699 *found_type = btrfs_dir_type(path->nodes[0], di); 1700 1701out: 1702 btrfs_free_path(path); 1703 return ret; 1704} 1705 1706/* 1707 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, 1708 * generation of the parent dir and the name of the dir entry. 1709 */ 1710static int get_first_ref(struct btrfs_root *root, u64 ino, 1711 u64 *dir, u64 *dir_gen, struct fs_path *name) 1712{ 1713 int ret; 1714 struct btrfs_key key; 1715 struct btrfs_key found_key; 1716 struct btrfs_path *path; 1717 int len; 1718 u64 parent_dir; 1719 1720 path = alloc_path_for_send(); 1721 if (!path) 1722 return -ENOMEM; 1723 1724 key.objectid = ino; 1725 key.type = BTRFS_INODE_REF_KEY; 1726 key.offset = 0; 1727 1728 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1729 if (ret < 0) 1730 goto out; 1731 if (!ret) 1732 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1733 path->slots[0]); 1734 if (ret || found_key.objectid != ino || 1735 (found_key.type != BTRFS_INODE_REF_KEY && 1736 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1737 ret = -ENOENT; 1738 goto out; 1739 } 1740 1741 if (found_key.type == BTRFS_INODE_REF_KEY) { 1742 struct btrfs_inode_ref *iref; 1743 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1744 struct btrfs_inode_ref); 1745 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1746 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1747 (unsigned long)(iref + 1), 1748 len); 1749 parent_dir = found_key.offset; 1750 } else { 1751 struct btrfs_inode_extref *extref; 1752 extref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1753 struct btrfs_inode_extref); 1754 len = btrfs_inode_extref_name_len(path->nodes[0], extref); 1755 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1756 (unsigned long)&extref->name, len); 1757 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 1758 } 1759 if (ret < 0) 1760 goto out; 1761 btrfs_release_path(path); 1762 1763 if (dir_gen) { 1764 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, 1765 NULL, NULL, NULL); 1766 if (ret < 0) 1767 goto out; 1768 } 1769 1770 *dir = parent_dir; 1771 1772out: 1773 btrfs_free_path(path); 1774 return ret; 1775} 1776 1777static int is_first_ref(struct btrfs_root *root, 1778 u64 ino, u64 dir, 1779 const char *name, int name_len) 1780{ 1781 int ret; 1782 struct fs_path *tmp_name; 1783 u64 tmp_dir; 1784 1785 tmp_name = fs_path_alloc(); 1786 if (!tmp_name) 1787 return -ENOMEM; 1788 1789 ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name); 1790 if (ret < 0) 1791 goto out; 1792 1793 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { 1794 ret = 0; 1795 goto out; 1796 } 1797 1798 ret = !memcmp(tmp_name->start, name, name_len); 1799 1800out: 1801 fs_path_free(tmp_name); 1802 return ret; 1803} 1804 1805/* 1806 * Used by process_recorded_refs to determine if a new ref would overwrite an 1807 * already existing ref. In case it detects an overwrite, it returns the 1808 * inode/gen in who_ino/who_gen. 1809 * When an overwrite is detected, process_recorded_refs does proper orphanizing 1810 * to make sure later references to the overwritten inode are possible. 1811 * Orphanizing is however only required for the first ref of an inode. 1812 * process_recorded_refs does an additional is_first_ref check to see if 1813 * orphanizing is really required. 1814 */ 1815static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, 1816 const char *name, int name_len, 1817 u64 *who_ino, u64 *who_gen) 1818{ 1819 int ret = 0; 1820 u64 gen; 1821 u64 other_inode = 0; 1822 u8 other_type = 0; 1823 1824 if (!sctx->parent_root) 1825 goto out; 1826 1827 ret = is_inode_existent(sctx, dir, dir_gen); 1828 if (ret <= 0) 1829 goto out; 1830 1831 /* 1832 * If we have a parent root we need to verify that the parent dir was 1833 * not delted and then re-created, if it was then we have no overwrite 1834 * and we can just unlink this entry. 1835 */ 1836 if (sctx->parent_root) { 1837 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, 1838 NULL, NULL, NULL); 1839 if (ret < 0 && ret != -ENOENT) 1840 goto out; 1841 if (ret) { 1842 ret = 0; 1843 goto out; 1844 } 1845 if (gen != dir_gen) 1846 goto out; 1847 } 1848 1849 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1850 &other_inode, &other_type); 1851 if (ret < 0 && ret != -ENOENT) 1852 goto out; 1853 if (ret) { 1854 ret = 0; 1855 goto out; 1856 } 1857 1858 /* 1859 * Check if the overwritten ref was already processed. If yes, the ref 1860 * was already unlinked/moved, so we can safely assume that we will not 1861 * overwrite anything at this point in time. 1862 */ 1863 if (other_inode > sctx->send_progress) { 1864 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1865 who_gen, NULL, NULL, NULL, NULL); 1866 if (ret < 0) 1867 goto out; 1868 1869 ret = 1; 1870 *who_ino = other_inode; 1871 } else { 1872 ret = 0; 1873 } 1874 1875out: 1876 return ret; 1877} 1878 1879/* 1880 * Checks if the ref was overwritten by an already processed inode. This is 1881 * used by __get_cur_name_and_parent to find out if the ref was orphanized and 1882 * thus the orphan name needs be used. 1883 * process_recorded_refs also uses it to avoid unlinking of refs that were 1884 * overwritten. 1885 */ 1886static int did_overwrite_ref(struct send_ctx *sctx, 1887 u64 dir, u64 dir_gen, 1888 u64 ino, u64 ino_gen, 1889 const char *name, int name_len) 1890{ 1891 int ret = 0; 1892 u64 gen; 1893 u64 ow_inode; 1894 u8 other_type; 1895 1896 if (!sctx->parent_root) 1897 goto out; 1898 1899 ret = is_inode_existent(sctx, dir, dir_gen); 1900 if (ret <= 0) 1901 goto out; 1902 1903 /* check if the ref was overwritten by another ref */ 1904 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, 1905 &ow_inode, &other_type); 1906 if (ret < 0 && ret != -ENOENT) 1907 goto out; 1908 if (ret) { 1909 /* was never and will never be overwritten */ 1910 ret = 0; 1911 goto out; 1912 } 1913 1914 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, 1915 NULL, NULL); 1916 if (ret < 0) 1917 goto out; 1918 1919 if (ow_inode == ino && gen == ino_gen) { 1920 ret = 0; 1921 goto out; 1922 } 1923 1924 /* 1925 * We know that it is or will be overwritten. Check this now. 1926 * The current inode being processed might have been the one that caused 1927 * inode 'ino' to be orphanized, therefore check if ow_inode matches 1928 * the current inode being processed. 1929 */ 1930 if ((ow_inode < sctx->send_progress) || 1931 (ino != sctx->cur_ino && ow_inode == sctx->cur_ino && 1932 gen == sctx->cur_inode_gen)) 1933 ret = 1; 1934 else 1935 ret = 0; 1936 1937out: 1938 return ret; 1939} 1940 1941/* 1942 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode 1943 * that got overwritten. This is used by process_recorded_refs to determine 1944 * if it has to use the path as returned by get_cur_path or the orphan name. 1945 */ 1946static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) 1947{ 1948 int ret = 0; 1949 struct fs_path *name = NULL; 1950 u64 dir; 1951 u64 dir_gen; 1952 1953 if (!sctx->parent_root) 1954 goto out; 1955 1956 name = fs_path_alloc(); 1957 if (!name) 1958 return -ENOMEM; 1959 1960 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); 1961 if (ret < 0) 1962 goto out; 1963 1964 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, 1965 name->start, fs_path_len(name)); 1966 1967out: 1968 fs_path_free(name); 1969 return ret; 1970} 1971 1972/* 1973 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 1974 * so we need to do some special handling in case we have clashes. This function 1975 * takes care of this with the help of name_cache_entry::radix_list. 1976 * In case of error, nce is kfreed. 1977 */ 1978static int name_cache_insert(struct send_ctx *sctx, 1979 struct name_cache_entry *nce) 1980{ 1981 int ret = 0; 1982 struct list_head *nce_head; 1983 1984 nce_head = radix_tree_lookup(&sctx->name_cache, 1985 (unsigned long)nce->ino); 1986 if (!nce_head) { 1987 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1988 if (!nce_head) { 1989 kfree(nce); 1990 return -ENOMEM; 1991 } 1992 INIT_LIST_HEAD(nce_head); 1993 1994 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1995 if (ret < 0) { 1996 kfree(nce_head); 1997 kfree(nce); 1998 return ret; 1999 } 2000 } 2001 list_add_tail(&nce->radix_list, nce_head); 2002 list_add_tail(&nce->list, &sctx->name_cache_list); 2003 sctx->name_cache_size++; 2004 2005 return ret; 2006} 2007 2008static void name_cache_delete(struct send_ctx *sctx, 2009 struct name_cache_entry *nce) 2010{ 2011 struct list_head *nce_head; 2012 2013 nce_head = radix_tree_lookup(&sctx->name_cache, 2014 (unsigned long)nce->ino); 2015 if (!nce_head) { 2016 btrfs_err(sctx->send_root->fs_info, 2017 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", 2018 nce->ino, sctx->name_cache_size); 2019 } 2020 2021 list_del(&nce->radix_list); 2022 list_del(&nce->list); 2023 sctx->name_cache_size--; 2024 2025 /* 2026 * We may not get to the final release of nce_head if the lookup fails 2027 */ 2028 if (nce_head && list_empty(nce_head)) { 2029 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 2030 kfree(nce_head); 2031 } 2032} 2033 2034static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, 2035 u64 ino, u64 gen) 2036{ 2037 struct list_head *nce_head; 2038 struct name_cache_entry *cur; 2039 2040 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 2041 if (!nce_head) 2042 return NULL; 2043 2044 list_for_each_entry(cur, nce_head, radix_list) { 2045 if (cur->ino == ino && cur->gen == gen) 2046 return cur; 2047 } 2048 return NULL; 2049} 2050 2051/* 2052 * Removes the entry from the list and adds it back to the end. This marks the 2053 * entry as recently used so that name_cache_clean_unused does not remove it. 2054 */ 2055static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) 2056{ 2057 list_del(&nce->list); 2058 list_add_tail(&nce->list, &sctx->name_cache_list); 2059} 2060 2061/* 2062 * Remove some entries from the beginning of name_cache_list. 2063 */ 2064static void name_cache_clean_unused(struct send_ctx *sctx) 2065{ 2066 struct name_cache_entry *nce; 2067 2068 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) 2069 return; 2070 2071 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { 2072 nce = list_entry(sctx->name_cache_list.next, 2073 struct name_cache_entry, list); 2074 name_cache_delete(sctx, nce); 2075 kfree(nce); 2076 } 2077} 2078 2079static void name_cache_free(struct send_ctx *sctx) 2080{ 2081 struct name_cache_entry *nce; 2082 2083 while (!list_empty(&sctx->name_cache_list)) { 2084 nce = list_entry(sctx->name_cache_list.next, 2085 struct name_cache_entry, list); 2086 name_cache_delete(sctx, nce); 2087 kfree(nce); 2088 } 2089} 2090 2091/* 2092 * Used by get_cur_path for each ref up to the root. 2093 * Returns 0 if it succeeded. 2094 * Returns 1 if the inode is not existent or got overwritten. In that case, the 2095 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 2096 * is returned, parent_ino/parent_gen are not guaranteed to be valid. 2097 * Returns <0 in case of error. 2098 */ 2099static int __get_cur_name_and_parent(struct send_ctx *sctx, 2100 u64 ino, u64 gen, 2101 u64 *parent_ino, 2102 u64 *parent_gen, 2103 struct fs_path *dest) 2104{ 2105 int ret; 2106 int nce_ret; 2107 struct name_cache_entry *nce = NULL; 2108 2109 /* 2110 * First check if we already did a call to this function with the same 2111 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 2112 * return the cached result. 2113 */ 2114 nce = name_cache_search(sctx, ino, gen); 2115 if (nce) { 2116 if (ino < sctx->send_progress && nce->need_later_update) { 2117 name_cache_delete(sctx, nce); 2118 kfree(nce); 2119 nce = NULL; 2120 } else { 2121 name_cache_used(sctx, nce); 2122 *parent_ino = nce->parent_ino; 2123 *parent_gen = nce->parent_gen; 2124 ret = fs_path_add(dest, nce->name, nce->name_len); 2125 if (ret < 0) 2126 goto out; 2127 ret = nce->ret; 2128 goto out; 2129 } 2130 } 2131 2132 /* 2133 * If the inode is not existent yet, add the orphan name and return 1. 2134 * This should only happen for the parent dir that we determine in 2135 * __record_new_ref 2136 */ 2137 ret = is_inode_existent(sctx, ino, gen); 2138 if (ret < 0) 2139 goto out; 2140 2141 if (!ret) { 2142 ret = gen_unique_name(sctx, ino, gen, dest); 2143 if (ret < 0) 2144 goto out; 2145 ret = 1; 2146 goto out_cache; 2147 } 2148 2149 /* 2150 * Depending on whether the inode was already processed or not, use 2151 * send_root or parent_root for ref lookup. 2152 */ 2153 if (ino < sctx->send_progress) 2154 ret = get_first_ref(sctx->send_root, ino, 2155 parent_ino, parent_gen, dest); 2156 else 2157 ret = get_first_ref(sctx->parent_root, ino, 2158 parent_ino, parent_gen, dest); 2159 if (ret < 0) 2160 goto out; 2161 2162 /* 2163 * Check if the ref was overwritten by an inode's ref that was processed 2164 * earlier. If yes, treat as orphan and return 1. 2165 */ 2166 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2167 dest->start, dest->end - dest->start); 2168 if (ret < 0) 2169 goto out; 2170 if (ret) { 2171 fs_path_reset(dest); 2172 ret = gen_unique_name(sctx, ino, gen, dest); 2173 if (ret < 0) 2174 goto out; 2175 ret = 1; 2176 } 2177 2178out_cache: 2179 /* 2180 * Store the result of the lookup in the name cache. 2181 */ 2182 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); 2183 if (!nce) { 2184 ret = -ENOMEM; 2185 goto out; 2186 } 2187 2188 nce->ino = ino; 2189 nce->gen = gen; 2190 nce->parent_ino = *parent_ino; 2191 nce->parent_gen = *parent_gen; 2192 nce->name_len = fs_path_len(dest); 2193 nce->ret = ret; 2194 strcpy(nce->name, dest->start); 2195 2196 if (ino < sctx->send_progress) 2197 nce->need_later_update = 0; 2198 else 2199 nce->need_later_update = 1; 2200 2201 nce_ret = name_cache_insert(sctx, nce); 2202 if (nce_ret < 0) 2203 ret = nce_ret; 2204 name_cache_clean_unused(sctx); 2205 2206out: 2207 return ret; 2208} 2209 2210/* 2211 * Magic happens here. This function returns the first ref to an inode as it 2212 * would look like while receiving the stream at this point in time. 2213 * We walk the path up to the root. For every inode in between, we check if it 2214 * was already processed/sent. If yes, we continue with the parent as found 2215 * in send_root. If not, we continue with the parent as found in parent_root. 2216 * If we encounter an inode that was deleted at this point in time, we use the 2217 * inodes "orphan" name instead of the real name and stop. Same with new inodes 2218 * that were not created yet and overwritten inodes/refs. 2219 * 2220 * When do we have have orphan inodes: 2221 * 1. When an inode is freshly created and thus no valid refs are available yet 2222 * 2. When a directory lost all it's refs (deleted) but still has dir items 2223 * inside which were not processed yet (pending for move/delete). If anyone 2224 * tried to get the path to the dir items, it would get a path inside that 2225 * orphan directory. 2226 * 3. When an inode is moved around or gets new links, it may overwrite the ref 2227 * of an unprocessed inode. If in that case the first ref would be 2228 * overwritten, the overwritten inode gets "orphanized". Later when we 2229 * process this overwritten inode, it is restored at a new place by moving 2230 * the orphan inode. 2231 * 2232 * sctx->send_progress tells this function at which point in time receiving 2233 * would be. 2234 */ 2235static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, 2236 struct fs_path *dest) 2237{ 2238 int ret = 0; 2239 struct fs_path *name = NULL; 2240 u64 parent_inode = 0; 2241 u64 parent_gen = 0; 2242 int stop = 0; 2243 2244 name = fs_path_alloc(); 2245 if (!name) { 2246 ret = -ENOMEM; 2247 goto out; 2248 } 2249 2250 dest->reversed = 1; 2251 fs_path_reset(dest); 2252 2253 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2254 struct waiting_dir_move *wdm; 2255 2256 fs_path_reset(name); 2257 2258 if (is_waiting_for_rm(sctx, ino)) { 2259 ret = gen_unique_name(sctx, ino, gen, name); 2260 if (ret < 0) 2261 goto out; 2262 ret = fs_path_add_path(dest, name); 2263 break; 2264 } 2265 2266 wdm = get_waiting_dir_move(sctx, ino); 2267 if (wdm && wdm->orphanized) { 2268 ret = gen_unique_name(sctx, ino, gen, name); 2269 stop = 1; 2270 } else if (wdm) { 2271 ret = get_first_ref(sctx->parent_root, ino, 2272 &parent_inode, &parent_gen, name); 2273 } else { 2274 ret = __get_cur_name_and_parent(sctx, ino, gen, 2275 &parent_inode, 2276 &parent_gen, name); 2277 if (ret) 2278 stop = 1; 2279 } 2280 2281 if (ret < 0) 2282 goto out; 2283 2284 ret = fs_path_add_path(dest, name); 2285 if (ret < 0) 2286 goto out; 2287 2288 ino = parent_inode; 2289 gen = parent_gen; 2290 } 2291 2292out: 2293 fs_path_free(name); 2294 if (!ret) 2295 fs_path_unreverse(dest); 2296 return ret; 2297} 2298 2299/* 2300 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2301 */ 2302static int send_subvol_begin(struct send_ctx *sctx) 2303{ 2304 int ret; 2305 struct btrfs_root *send_root = sctx->send_root; 2306 struct btrfs_root *parent_root = sctx->parent_root; 2307 struct btrfs_path *path; 2308 struct btrfs_key key; 2309 struct btrfs_root_ref *ref; 2310 struct extent_buffer *leaf; 2311 char *name = NULL; 2312 int namelen; 2313 2314 path = btrfs_alloc_path(); 2315 if (!path) 2316 return -ENOMEM; 2317 2318 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); 2319 if (!name) { 2320 btrfs_free_path(path); 2321 return -ENOMEM; 2322 } 2323 2324 key.objectid = send_root->objectid; 2325 key.type = BTRFS_ROOT_BACKREF_KEY; 2326 key.offset = 0; 2327 2328 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2329 &key, path, 1, 0); 2330 if (ret < 0) 2331 goto out; 2332 if (ret) { 2333 ret = -ENOENT; 2334 goto out; 2335 } 2336 2337 leaf = path->nodes[0]; 2338 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2339 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2340 key.objectid != send_root->objectid) { 2341 ret = -ENOENT; 2342 goto out; 2343 } 2344 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2345 namelen = btrfs_root_ref_name_len(leaf, ref); 2346 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); 2347 btrfs_release_path(path); 2348 2349 if (parent_root) { 2350 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2351 if (ret < 0) 2352 goto out; 2353 } else { 2354 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2355 if (ret < 0) 2356 goto out; 2357 } 2358 2359 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); 2360 2361 if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) 2362 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2363 sctx->send_root->root_item.received_uuid); 2364 else 2365 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2366 sctx->send_root->root_item.uuid); 2367 2368 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2369 le64_to_cpu(sctx->send_root->root_item.ctransid)); 2370 if (parent_root) { 2371 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid)) 2372 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2373 parent_root->root_item.received_uuid); 2374 else 2375 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2376 parent_root->root_item.uuid); 2377 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2378 le64_to_cpu(sctx->parent_root->root_item.ctransid)); 2379 } 2380 2381 ret = send_cmd(sctx); 2382 2383tlv_put_failure: 2384out: 2385 btrfs_free_path(path); 2386 kfree(name); 2387 return ret; 2388} 2389 2390static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2391{ 2392 int ret = 0; 2393 struct fs_path *p; 2394 2395verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); 2396 2397 p = fs_path_alloc(); 2398 if (!p) 2399 return -ENOMEM; 2400 2401 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2402 if (ret < 0) 2403 goto out; 2404 2405 ret = get_cur_path(sctx, ino, gen, p); 2406 if (ret < 0) 2407 goto out; 2408 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2409 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2410 2411 ret = send_cmd(sctx); 2412 2413tlv_put_failure: 2414out: 2415 fs_path_free(p); 2416 return ret; 2417} 2418 2419static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) 2420{ 2421 int ret = 0; 2422 struct fs_path *p; 2423 2424verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); 2425 2426 p = fs_path_alloc(); 2427 if (!p) 2428 return -ENOMEM; 2429 2430 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2431 if (ret < 0) 2432 goto out; 2433 2434 ret = get_cur_path(sctx, ino, gen, p); 2435 if (ret < 0) 2436 goto out; 2437 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2438 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2439 2440 ret = send_cmd(sctx); 2441 2442tlv_put_failure: 2443out: 2444 fs_path_free(p); 2445 return ret; 2446} 2447 2448static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) 2449{ 2450 int ret = 0; 2451 struct fs_path *p; 2452 2453verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); 2454 2455 p = fs_path_alloc(); 2456 if (!p) 2457 return -ENOMEM; 2458 2459 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2460 if (ret < 0) 2461 goto out; 2462 2463 ret = get_cur_path(sctx, ino, gen, p); 2464 if (ret < 0) 2465 goto out; 2466 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2467 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2468 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); 2469 2470 ret = send_cmd(sctx); 2471 2472tlv_put_failure: 2473out: 2474 fs_path_free(p); 2475 return ret; 2476} 2477 2478static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) 2479{ 2480 int ret = 0; 2481 struct fs_path *p = NULL; 2482 struct btrfs_inode_item *ii; 2483 struct btrfs_path *path = NULL; 2484 struct extent_buffer *eb; 2485 struct btrfs_key key; 2486 int slot; 2487 2488verbose_printk("btrfs: send_utimes %llu\n", ino); 2489 2490 p = fs_path_alloc(); 2491 if (!p) 2492 return -ENOMEM; 2493 2494 path = alloc_path_for_send(); 2495 if (!path) { 2496 ret = -ENOMEM; 2497 goto out; 2498 } 2499 2500 key.objectid = ino; 2501 key.type = BTRFS_INODE_ITEM_KEY; 2502 key.offset = 0; 2503 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2504 if (ret < 0) 2505 goto out; 2506 2507 eb = path->nodes[0]; 2508 slot = path->slots[0]; 2509 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 2510 2511 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); 2512 if (ret < 0) 2513 goto out; 2514 2515 ret = get_cur_path(sctx, ino, gen, p); 2516 if (ret < 0) 2517 goto out; 2518 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2519 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime); 2520 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime); 2521 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime); 2522 /* TODO Add otime support when the otime patches get into upstream */ 2523 2524 ret = send_cmd(sctx); 2525 2526tlv_put_failure: 2527out: 2528 fs_path_free(p); 2529 btrfs_free_path(path); 2530 return ret; 2531} 2532 2533/* 2534 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have 2535 * a valid path yet because we did not process the refs yet. So, the inode 2536 * is created as orphan. 2537 */ 2538static int send_create_inode(struct send_ctx *sctx, u64 ino) 2539{ 2540 int ret = 0; 2541 struct fs_path *p; 2542 int cmd; 2543 u64 gen; 2544 u64 mode; 2545 u64 rdev; 2546 2547verbose_printk("btrfs: send_create_inode %llu\n", ino); 2548 2549 p = fs_path_alloc(); 2550 if (!p) 2551 return -ENOMEM; 2552 2553 if (ino != sctx->cur_ino) { 2554 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, 2555 NULL, NULL, &rdev); 2556 if (ret < 0) 2557 goto out; 2558 } else { 2559 gen = sctx->cur_inode_gen; 2560 mode = sctx->cur_inode_mode; 2561 rdev = sctx->cur_inode_rdev; 2562 } 2563 2564 if (S_ISREG(mode)) { 2565 cmd = BTRFS_SEND_C_MKFILE; 2566 } else if (S_ISDIR(mode)) { 2567 cmd = BTRFS_SEND_C_MKDIR; 2568 } else if (S_ISLNK(mode)) { 2569 cmd = BTRFS_SEND_C_SYMLINK; 2570 } else if (S_ISCHR(mode) || S_ISBLK(mode)) { 2571 cmd = BTRFS_SEND_C_MKNOD; 2572 } else if (S_ISFIFO(mode)) { 2573 cmd = BTRFS_SEND_C_MKFIFO; 2574 } else if (S_ISSOCK(mode)) { 2575 cmd = BTRFS_SEND_C_MKSOCK; 2576 } else { 2577 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", 2578 (int)(mode & S_IFMT)); 2579 ret = -ENOTSUPP; 2580 goto out; 2581 } 2582 2583 ret = begin_cmd(sctx, cmd); 2584 if (ret < 0) 2585 goto out; 2586 2587 ret = gen_unique_name(sctx, ino, gen, p); 2588 if (ret < 0) 2589 goto out; 2590 2591 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2592 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); 2593 2594 if (S_ISLNK(mode)) { 2595 fs_path_reset(p); 2596 ret = read_symlink(sctx->send_root, ino, p); 2597 if (ret < 0) 2598 goto out; 2599 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2600 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2601 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2602 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); 2603 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); 2604 } 2605 2606 ret = send_cmd(sctx); 2607 if (ret < 0) 2608 goto out; 2609 2610 2611tlv_put_failure: 2612out: 2613 fs_path_free(p); 2614 return ret; 2615} 2616 2617/* 2618 * We need some special handling for inodes that get processed before the parent 2619 * directory got created. See process_recorded_refs for details. 2620 * This function does the check if we already created the dir out of order. 2621 */ 2622static int did_create_dir(struct send_ctx *sctx, u64 dir) 2623{ 2624 int ret = 0; 2625 struct btrfs_path *path = NULL; 2626 struct btrfs_key key; 2627 struct btrfs_key found_key; 2628 struct btrfs_key di_key; 2629 struct extent_buffer *eb; 2630 struct btrfs_dir_item *di; 2631 int slot; 2632 2633 path = alloc_path_for_send(); 2634 if (!path) { 2635 ret = -ENOMEM; 2636 goto out; 2637 } 2638 2639 key.objectid = dir; 2640 key.type = BTRFS_DIR_INDEX_KEY; 2641 key.offset = 0; 2642 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2643 if (ret < 0) 2644 goto out; 2645 2646 while (1) { 2647 eb = path->nodes[0]; 2648 slot = path->slots[0]; 2649 if (slot >= btrfs_header_nritems(eb)) { 2650 ret = btrfs_next_leaf(sctx->send_root, path); 2651 if (ret < 0) { 2652 goto out; 2653 } else if (ret > 0) { 2654 ret = 0; 2655 break; 2656 } 2657 continue; 2658 } 2659 2660 btrfs_item_key_to_cpu(eb, &found_key, slot); 2661 if (found_key.objectid != key.objectid || 2662 found_key.type != key.type) { 2663 ret = 0; 2664 goto out; 2665 } 2666 2667 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2668 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2669 2670 if (di_key.type != BTRFS_ROOT_ITEM_KEY && 2671 di_key.objectid < sctx->send_progress) { 2672 ret = 1; 2673 goto out; 2674 } 2675 2676 path->slots[0]++; 2677 } 2678 2679out: 2680 btrfs_free_path(path); 2681 return ret; 2682} 2683 2684/* 2685 * Only creates the inode if it is: 2686 * 1. Not a directory 2687 * 2. Or a directory which was not created already due to out of order 2688 * directories. See did_create_dir and process_recorded_refs for details. 2689 */ 2690static int send_create_inode_if_needed(struct send_ctx *sctx) 2691{ 2692 int ret; 2693 2694 if (S_ISDIR(sctx->cur_inode_mode)) { 2695 ret = did_create_dir(sctx, sctx->cur_ino); 2696 if (ret < 0) 2697 goto out; 2698 if (ret) { 2699 ret = 0; 2700 goto out; 2701 } 2702 } 2703 2704 ret = send_create_inode(sctx, sctx->cur_ino); 2705 if (ret < 0) 2706 goto out; 2707 2708out: 2709 return ret; 2710} 2711 2712struct recorded_ref { 2713 struct list_head list; 2714 char *dir_path; 2715 char *name; 2716 struct fs_path *full_path; 2717 u64 dir; 2718 u64 dir_gen; 2719 int dir_path_len; 2720 int name_len; 2721}; 2722 2723/* 2724 * We need to process new refs before deleted refs, but compare_tree gives us 2725 * everything mixed. So we first record all refs and later process them. 2726 * This function is a helper to record one ref. 2727 */ 2728static int __record_ref(struct list_head *head, u64 dir, 2729 u64 dir_gen, struct fs_path *path) 2730{ 2731 struct recorded_ref *ref; 2732 2733 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2734 if (!ref) 2735 return -ENOMEM; 2736 2737 ref->dir = dir; 2738 ref->dir_gen = dir_gen; 2739 ref->full_path = path; 2740 2741 ref->name = (char *)kbasename(ref->full_path->start); 2742 ref->name_len = ref->full_path->end - ref->name; 2743 ref->dir_path = ref->full_path->start; 2744 if (ref->name == ref->full_path->start) 2745 ref->dir_path_len = 0; 2746 else 2747 ref->dir_path_len = ref->full_path->end - 2748 ref->full_path->start - 1 - ref->name_len; 2749 2750 list_add_tail(&ref->list, head); 2751 return 0; 2752} 2753 2754static int dup_ref(struct recorded_ref *ref, struct list_head *list) 2755{ 2756 struct recorded_ref *new; 2757 2758 new = kmalloc(sizeof(*ref), GFP_NOFS); 2759 if (!new) 2760 return -ENOMEM; 2761 2762 new->dir = ref->dir; 2763 new->dir_gen = ref->dir_gen; 2764 new->full_path = NULL; 2765 INIT_LIST_HEAD(&new->list); 2766 list_add_tail(&new->list, list); 2767 return 0; 2768} 2769 2770static void __free_recorded_refs(struct list_head *head) 2771{ 2772 struct recorded_ref *cur; 2773 2774 while (!list_empty(head)) { 2775 cur = list_entry(head->next, struct recorded_ref, list); 2776 fs_path_free(cur->full_path); 2777 list_del(&cur->list); 2778 kfree(cur); 2779 } 2780} 2781 2782static void free_recorded_refs(struct send_ctx *sctx) 2783{ 2784 __free_recorded_refs(&sctx->new_refs); 2785 __free_recorded_refs(&sctx->deleted_refs); 2786} 2787 2788/* 2789 * Renames/moves a file/dir to its orphan name. Used when the first 2790 * ref of an unprocessed inode gets overwritten and for all non empty 2791 * directories. 2792 */ 2793static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, 2794 struct fs_path *path) 2795{ 2796 int ret; 2797 struct fs_path *orphan; 2798 2799 orphan = fs_path_alloc(); 2800 if (!orphan) 2801 return -ENOMEM; 2802 2803 ret = gen_unique_name(sctx, ino, gen, orphan); 2804 if (ret < 0) 2805 goto out; 2806 2807 ret = send_rename(sctx, path, orphan); 2808 2809out: 2810 fs_path_free(orphan); 2811 return ret; 2812} 2813 2814static struct orphan_dir_info * 2815add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2816{ 2817 struct rb_node **p = &sctx->orphan_dirs.rb_node; 2818 struct rb_node *parent = NULL; 2819 struct orphan_dir_info *entry, *odi; 2820 2821 odi = kmalloc(sizeof(*odi), GFP_NOFS); 2822 if (!odi) 2823 return ERR_PTR(-ENOMEM); 2824 odi->ino = dir_ino; 2825 odi->gen = 0; 2826 2827 while (*p) { 2828 parent = *p; 2829 entry = rb_entry(parent, struct orphan_dir_info, node); 2830 if (dir_ino < entry->ino) { 2831 p = &(*p)->rb_left; 2832 } else if (dir_ino > entry->ino) { 2833 p = &(*p)->rb_right; 2834 } else { 2835 kfree(odi); 2836 return entry; 2837 } 2838 } 2839 2840 rb_link_node(&odi->node, parent, p); 2841 rb_insert_color(&odi->node, &sctx->orphan_dirs); 2842 return odi; 2843} 2844 2845static struct orphan_dir_info * 2846get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2847{ 2848 struct rb_node *n = sctx->orphan_dirs.rb_node; 2849 struct orphan_dir_info *entry; 2850 2851 while (n) { 2852 entry = rb_entry(n, struct orphan_dir_info, node); 2853 if (dir_ino < entry->ino) 2854 n = n->rb_left; 2855 else if (dir_ino > entry->ino) 2856 n = n->rb_right; 2857 else 2858 return entry; 2859 } 2860 return NULL; 2861} 2862 2863static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) 2864{ 2865 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); 2866 2867 return odi != NULL; 2868} 2869 2870static void free_orphan_dir_info(struct send_ctx *sctx, 2871 struct orphan_dir_info *odi) 2872{ 2873 if (!odi) 2874 return; 2875 rb_erase(&odi->node, &sctx->orphan_dirs); 2876 kfree(odi); 2877} 2878 2879/* 2880 * Returns 1 if a directory can be removed at this point in time. 2881 * We check this by iterating all dir items and checking if the inode behind 2882 * the dir item was already processed. 2883 */ 2884static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, 2885 u64 send_progress) 2886{ 2887 int ret = 0; 2888 struct btrfs_root *root = sctx->parent_root; 2889 struct btrfs_path *path; 2890 struct btrfs_key key; 2891 struct btrfs_key found_key; 2892 struct btrfs_key loc; 2893 struct btrfs_dir_item *di; 2894 2895 /* 2896 * Don't try to rmdir the top/root subvolume dir. 2897 */ 2898 if (dir == BTRFS_FIRST_FREE_OBJECTID) 2899 return 0; 2900 2901 path = alloc_path_for_send(); 2902 if (!path) 2903 return -ENOMEM; 2904 2905 key.objectid = dir; 2906 key.type = BTRFS_DIR_INDEX_KEY; 2907 key.offset = 0; 2908 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2909 if (ret < 0) 2910 goto out; 2911 2912 while (1) { 2913 struct waiting_dir_move *dm; 2914 2915 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2916 ret = btrfs_next_leaf(root, path); 2917 if (ret < 0) 2918 goto out; 2919 else if (ret > 0) 2920 break; 2921 continue; 2922 } 2923 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2924 path->slots[0]); 2925 if (found_key.objectid != key.objectid || 2926 found_key.type != key.type) 2927 break; 2928 2929 di = btrfs_item_ptr(path->nodes[0], path->slots[0], 2930 struct btrfs_dir_item); 2931 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); 2932 2933 dm = get_waiting_dir_move(sctx, loc.objectid); 2934 if (dm) { 2935 struct orphan_dir_info *odi; 2936 2937 odi = add_orphan_dir_info(sctx, dir); 2938 if (IS_ERR(odi)) { 2939 ret = PTR_ERR(odi); 2940 goto out; 2941 } 2942 odi->gen = dir_gen; 2943 dm->rmdir_ino = dir; 2944 ret = 0; 2945 goto out; 2946 } 2947 2948 if (loc.objectid > send_progress) { 2949 ret = 0; 2950 goto out; 2951 } 2952 2953 path->slots[0]++; 2954 } 2955 2956 ret = 1; 2957 2958out: 2959 btrfs_free_path(path); 2960 return ret; 2961} 2962 2963static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) 2964{ 2965 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); 2966 2967 return entry != NULL; 2968} 2969 2970static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) 2971{ 2972 struct rb_node **p = &sctx->waiting_dir_moves.rb_node; 2973 struct rb_node *parent = NULL; 2974 struct waiting_dir_move *entry, *dm; 2975 2976 dm = kmalloc(sizeof(*dm), GFP_NOFS); 2977 if (!dm) 2978 return -ENOMEM; 2979 dm->ino = ino; 2980 dm->rmdir_ino = 0; 2981 dm->orphanized = orphanized; 2982 2983 while (*p) { 2984 parent = *p; 2985 entry = rb_entry(parent, struct waiting_dir_move, node); 2986 if (ino < entry->ino) { 2987 p = &(*p)->rb_left; 2988 } else if (ino > entry->ino) { 2989 p = &(*p)->rb_right; 2990 } else { 2991 kfree(dm); 2992 return -EEXIST; 2993 } 2994 } 2995 2996 rb_link_node(&dm->node, parent, p); 2997 rb_insert_color(&dm->node, &sctx->waiting_dir_moves); 2998 return 0; 2999} 3000 3001static struct waiting_dir_move * 3002get_waiting_dir_move(struct send_ctx *sctx, u64 ino) 3003{ 3004 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 3005 struct waiting_dir_move *entry; 3006 3007 while (n) { 3008 entry = rb_entry(n, struct waiting_dir_move, node); 3009 if (ino < entry->ino) 3010 n = n->rb_left; 3011 else if (ino > entry->ino) 3012 n = n->rb_right; 3013 else 3014 return entry; 3015 } 3016 return NULL; 3017} 3018 3019static void free_waiting_dir_move(struct send_ctx *sctx, 3020 struct waiting_dir_move *dm) 3021{ 3022 if (!dm) 3023 return; 3024 rb_erase(&dm->node, &sctx->waiting_dir_moves); 3025 kfree(dm); 3026} 3027 3028static int add_pending_dir_move(struct send_ctx *sctx, 3029 u64 ino, 3030 u64 ino_gen, 3031 u64 parent_ino, 3032 struct list_head *new_refs, 3033 struct list_head *deleted_refs, 3034 const bool is_orphan) 3035{ 3036 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 3037 struct rb_node *parent = NULL; 3038 struct pending_dir_move *entry = NULL, *pm; 3039 struct recorded_ref *cur; 3040 int exists = 0; 3041 int ret; 3042 3043 pm = kmalloc(sizeof(*pm), GFP_NOFS); 3044 if (!pm) 3045 return -ENOMEM; 3046 pm->parent_ino = parent_ino; 3047 pm->ino = ino; 3048 pm->gen = ino_gen; 3049 pm->is_orphan = is_orphan; 3050 INIT_LIST_HEAD(&pm->list); 3051 INIT_LIST_HEAD(&pm->update_refs); 3052 RB_CLEAR_NODE(&pm->node); 3053 3054 while (*p) { 3055 parent = *p; 3056 entry = rb_entry(parent, struct pending_dir_move, node); 3057 if (parent_ino < entry->parent_ino) { 3058 p = &(*p)->rb_left; 3059 } else if (parent_ino > entry->parent_ino) { 3060 p = &(*p)->rb_right; 3061 } else { 3062 exists = 1; 3063 break; 3064 } 3065 } 3066 3067 list_for_each_entry(cur, deleted_refs, list) { 3068 ret = dup_ref(cur, &pm->update_refs); 3069 if (ret < 0) 3070 goto out; 3071 } 3072 list_for_each_entry(cur, new_refs, list) { 3073 ret = dup_ref(cur, &pm->update_refs); 3074 if (ret < 0) 3075 goto out; 3076 } 3077 3078 ret = add_waiting_dir_move(sctx, pm->ino, is_orphan); 3079 if (ret) 3080 goto out; 3081 3082 if (exists) { 3083 list_add_tail(&pm->list, &entry->list); 3084 } else { 3085 rb_link_node(&pm->node, parent, p); 3086 rb_insert_color(&pm->node, &sctx->pending_dir_moves); 3087 } 3088 ret = 0; 3089out: 3090 if (ret) { 3091 __free_recorded_refs(&pm->update_refs); 3092 kfree(pm); 3093 } 3094 return ret; 3095} 3096 3097static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, 3098 u64 parent_ino) 3099{ 3100 struct rb_node *n = sctx->pending_dir_moves.rb_node; 3101 struct pending_dir_move *entry; 3102 3103 while (n) { 3104 entry = rb_entry(n, struct pending_dir_move, node); 3105 if (parent_ino < entry->parent_ino) 3106 n = n->rb_left; 3107 else if (parent_ino > entry->parent_ino) 3108 n = n->rb_right; 3109 else 3110 return entry; 3111 } 3112 return NULL; 3113} 3114 3115static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3116{ 3117 struct fs_path *from_path = NULL; 3118 struct fs_path *to_path = NULL; 3119 struct fs_path *name = NULL; 3120 u64 orig_progress = sctx->send_progress; 3121 struct recorded_ref *cur; 3122 u64 parent_ino, parent_gen; 3123 struct waiting_dir_move *dm = NULL; 3124 u64 rmdir_ino = 0; 3125 int ret; 3126 3127 name = fs_path_alloc(); 3128 from_path = fs_path_alloc(); 3129 if (!name || !from_path) { 3130 ret = -ENOMEM; 3131 goto out; 3132 } 3133 3134 dm = get_waiting_dir_move(sctx, pm->ino); 3135 ASSERT(dm); 3136 rmdir_ino = dm->rmdir_ino; 3137 free_waiting_dir_move(sctx, dm); 3138 3139 if (pm->is_orphan) { 3140 ret = gen_unique_name(sctx, pm->ino, 3141 pm->gen, from_path); 3142 } else { 3143 ret = get_first_ref(sctx->parent_root, pm->ino, 3144 &parent_ino, &parent_gen, name); 3145 if (ret < 0) 3146 goto out; 3147 ret = get_cur_path(sctx, parent_ino, parent_gen, 3148 from_path); 3149 if (ret < 0) 3150 goto out; 3151 ret = fs_path_add_path(from_path, name); 3152 } 3153 if (ret < 0) 3154 goto out; 3155 3156 sctx->send_progress = sctx->cur_ino + 1; 3157 fs_path_reset(name); 3158 to_path = name; 3159 name = NULL; 3160 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 3161 if (ret < 0) 3162 goto out; 3163 3164 ret = send_rename(sctx, from_path, to_path); 3165 if (ret < 0) 3166 goto out; 3167 3168 if (rmdir_ino) { 3169 struct orphan_dir_info *odi; 3170 3171 odi = get_orphan_dir_info(sctx, rmdir_ino); 3172 if (!odi) { 3173 /* already deleted */ 3174 goto finish; 3175 } 3176 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); 3177 if (ret < 0) 3178 goto out; 3179 if (!ret) 3180 goto finish; 3181 3182 name = fs_path_alloc(); 3183 if (!name) { 3184 ret = -ENOMEM; 3185 goto out; 3186 } 3187 ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); 3188 if (ret < 0) 3189 goto out; 3190 ret = send_rmdir(sctx, name); 3191 if (ret < 0) 3192 goto out; 3193 free_orphan_dir_info(sctx, odi); 3194 } 3195 3196finish: 3197 ret = send_utimes(sctx, pm->ino, pm->gen); 3198 if (ret < 0) 3199 goto out; 3200 3201 /* 3202 * After rename/move, need to update the utimes of both new parent(s) 3203 * and old parent(s). 3204 */ 3205 list_for_each_entry(cur, &pm->update_refs, list) { 3206 if (cur->dir == rmdir_ino) 3207 continue; 3208 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3209 if (ret < 0) 3210 goto out; 3211 } 3212 3213out: 3214 fs_path_free(name); 3215 fs_path_free(from_path); 3216 fs_path_free(to_path); 3217 sctx->send_progress = orig_progress; 3218 3219 return ret; 3220} 3221 3222static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) 3223{ 3224 if (!list_empty(&m->list)) 3225 list_del(&m->list); 3226 if (!RB_EMPTY_NODE(&m->node)) 3227 rb_erase(&m->node, &sctx->pending_dir_moves); 3228 __free_recorded_refs(&m->update_refs); 3229 kfree(m); 3230} 3231 3232static void tail_append_pending_moves(struct pending_dir_move *moves, 3233 struct list_head *stack) 3234{ 3235 if (list_empty(&moves->list)) { 3236 list_add_tail(&moves->list, stack); 3237 } else { 3238 LIST_HEAD(list); 3239 list_splice_init(&moves->list, &list); 3240 list_add_tail(&moves->list, stack); 3241 list_splice_tail(&list, stack); 3242 } 3243} 3244 3245static int apply_children_dir_moves(struct send_ctx *sctx) 3246{ 3247 struct pending_dir_move *pm; 3248 struct list_head stack; 3249 u64 parent_ino = sctx->cur_ino; 3250 int ret = 0; 3251 3252 pm = get_pending_dir_moves(sctx, parent_ino); 3253 if (!pm) 3254 return 0; 3255 3256 INIT_LIST_HEAD(&stack); 3257 tail_append_pending_moves(pm, &stack); 3258 3259 while (!list_empty(&stack)) { 3260 pm = list_first_entry(&stack, struct pending_dir_move, list); 3261 parent_ino = pm->ino; 3262 ret = apply_dir_move(sctx, pm); 3263 free_pending_move(sctx, pm); 3264 if (ret) 3265 goto out; 3266 pm = get_pending_dir_moves(sctx, parent_ino); 3267 if (pm) 3268 tail_append_pending_moves(pm, &stack); 3269 } 3270 return 0; 3271 3272out: 3273 while (!list_empty(&stack)) { 3274 pm = list_first_entry(&stack, struct pending_dir_move, list); 3275 free_pending_move(sctx, pm); 3276 } 3277 return ret; 3278} 3279 3280/* 3281 * We might need to delay a directory rename even when no ancestor directory 3282 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was 3283 * renamed. This happens when we rename a directory to the old name (the name 3284 * in the parent root) of some other unrelated directory that got its rename 3285 * delayed due to some ancestor with higher number that got renamed. 3286 * 3287 * Example: 3288 * 3289 * Parent snapshot: 3290 * . (ino 256) 3291 * |---- a/ (ino 257) 3292 * | |---- file (ino 260) 3293 * | 3294 * |---- b/ (ino 258) 3295 * |---- c/ (ino 259) 3296 * 3297 * Send snapshot: 3298 * . (ino 256) 3299 * |---- a/ (ino 258) 3300 * |---- x/ (ino 259) 3301 * |---- y/ (ino 257) 3302 * |----- file (ino 260) 3303 * 3304 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 3305 * from 'a' to 'x/y' happening first, which in turn depends on the rename of 3306 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream 3307 * must issue is: 3308 * 3309 * 1 - rename 259 from 'c' to 'x' 3310 * 2 - rename 257 from 'a' to 'x/y' 3311 * 3 - rename 258 from 'b' to 'a' 3312 * 3313 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can 3314 * be done right away and < 0 on error. 3315 */ 3316static int wait_for_dest_dir_move(struct send_ctx *sctx, 3317 struct recorded_ref *parent_ref, 3318 const bool is_orphan) 3319{ 3320 struct btrfs_path *path; 3321 struct btrfs_key key; 3322 struct btrfs_key di_key; 3323 struct btrfs_dir_item *di; 3324 u64 left_gen; 3325 u64 right_gen; 3326 int ret = 0; 3327 3328 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) 3329 return 0; 3330 3331 path = alloc_path_for_send(); 3332 if (!path) 3333 return -ENOMEM; 3334 3335 key.objectid = parent_ref->dir; 3336 key.type = BTRFS_DIR_ITEM_KEY; 3337 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); 3338 3339 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); 3340 if (ret < 0) { 3341 goto out; 3342 } else if (ret > 0) { 3343 ret = 0; 3344 goto out; 3345 } 3346 3347 di = btrfs_match_dir_item_name(sctx->parent_root, path, 3348 parent_ref->name, parent_ref->name_len); 3349 if (!di) { 3350 ret = 0; 3351 goto out; 3352 } 3353 /* 3354 * di_key.objectid has the number of the inode that has a dentry in the 3355 * parent directory with the same name that sctx->cur_ino is being 3356 * renamed to. We need to check if that inode is in the send root as 3357 * well and if it is currently marked as an inode with a pending rename, 3358 * if it is, we need to delay the rename of sctx->cur_ino as well, so 3359 * that it happens after that other inode is renamed. 3360 */ 3361 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); 3362 if (di_key.type != BTRFS_INODE_ITEM_KEY) { 3363 ret = 0; 3364 goto out; 3365 } 3366 3367 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, 3368 &left_gen, NULL, NULL, NULL, NULL); 3369 if (ret < 0) 3370 goto out; 3371 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, 3372 &right_gen, NULL, NULL, NULL, NULL); 3373 if (ret < 0) { 3374 if (ret == -ENOENT) 3375 ret = 0; 3376 goto out; 3377 } 3378 3379 /* Different inode, no need to delay the rename of sctx->cur_ino */ 3380 if (right_gen != left_gen) { 3381 ret = 0; 3382 goto out; 3383 } 3384 3385 if (is_waiting_for_move(sctx, di_key.objectid)) { 3386 ret = add_pending_dir_move(sctx, 3387 sctx->cur_ino, 3388 sctx->cur_inode_gen, 3389 di_key.objectid, 3390 &sctx->new_refs, 3391 &sctx->deleted_refs, 3392 is_orphan); 3393 if (!ret) 3394 ret = 1; 3395 } 3396out: 3397 btrfs_free_path(path); 3398 return ret; 3399} 3400 3401/* 3402 * Check if ino ino1 is an ancestor of inode ino2 in the given root. 3403 * Return 1 if true, 0 if false and < 0 on error. 3404 */ 3405static int is_ancestor(struct btrfs_root *root, 3406 const u64 ino1, 3407 const u64 ino1_gen, 3408 const u64 ino2, 3409 struct fs_path *fs_path) 3410{ 3411 u64 ino = ino2; 3412 3413 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3414 int ret; 3415 u64 parent; 3416 u64 parent_gen; 3417 3418 fs_path_reset(fs_path); 3419 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); 3420 if (ret < 0) { 3421 if (ret == -ENOENT && ino == ino2) 3422 ret = 0; 3423 return ret; 3424 } 3425 if (parent == ino1) 3426 return parent_gen == ino1_gen ? 1 : 0; 3427 ino = parent; 3428 } 3429 return 0; 3430} 3431 3432static int wait_for_parent_move(struct send_ctx *sctx, 3433 struct recorded_ref *parent_ref, 3434 const bool is_orphan) 3435{ 3436 int ret = 0; 3437 u64 ino = parent_ref->dir; 3438 u64 parent_ino_before, parent_ino_after; 3439 struct fs_path *path_before = NULL; 3440 struct fs_path *path_after = NULL; 3441 int len1, len2; 3442 3443 path_after = fs_path_alloc(); 3444 path_before = fs_path_alloc(); 3445 if (!path_after || !path_before) { 3446 ret = -ENOMEM; 3447 goto out; 3448 } 3449 3450 /* 3451 * Our current directory inode may not yet be renamed/moved because some 3452 * ancestor (immediate or not) has to be renamed/moved first. So find if 3453 * such ancestor exists and make sure our own rename/move happens after 3454 * that ancestor is processed to avoid path build infinite loops (done 3455 * at get_cur_path()). 3456 */ 3457 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3458 if (is_waiting_for_move(sctx, ino)) { 3459 /* 3460 * If the current inode is an ancestor of ino in the 3461 * parent root, we need to delay the rename of the 3462 * current inode, otherwise don't delayed the rename 3463 * because we can end up with a circular dependency 3464 * of renames, resulting in some directories never 3465 * getting the respective rename operations issued in 3466 * the send stream or getting into infinite path build 3467 * loops. 3468 */ 3469 ret = is_ancestor(sctx->parent_root, 3470 sctx->cur_ino, sctx->cur_inode_gen, 3471 ino, path_before); 3472 break; 3473 } 3474 3475 fs_path_reset(path_before); 3476 fs_path_reset(path_after); 3477 3478 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3479 NULL, path_after); 3480 if (ret < 0) 3481 goto out; 3482 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3483 NULL, path_before); 3484 if (ret < 0 && ret != -ENOENT) { 3485 goto out; 3486 } else if (ret == -ENOENT) { 3487 ret = 0; 3488 break; 3489 } 3490 3491 len1 = fs_path_len(path_before); 3492 len2 = fs_path_len(path_after); 3493 if (ino > sctx->cur_ino && 3494 (parent_ino_before != parent_ino_after || len1 != len2 || 3495 memcmp(path_before->start, path_after->start, len1))) { 3496 ret = 1; 3497 break; 3498 } 3499 ino = parent_ino_after; 3500 } 3501 3502out: 3503 fs_path_free(path_before); 3504 fs_path_free(path_after); 3505 3506 if (ret == 1) { 3507 ret = add_pending_dir_move(sctx, 3508 sctx->cur_ino, 3509 sctx->cur_inode_gen, 3510 ino, 3511 &sctx->new_refs, 3512 &sctx->deleted_refs, 3513 is_orphan); 3514 if (!ret) 3515 ret = 1; 3516 } 3517 3518 return ret; 3519} 3520 3521/* 3522 * This does all the move/link/unlink/rmdir magic. 3523 */ 3524static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) 3525{ 3526 int ret = 0; 3527 struct recorded_ref *cur; 3528 struct recorded_ref *cur2; 3529 struct list_head check_dirs; 3530 struct fs_path *valid_path = NULL; 3531 u64 ow_inode = 0; 3532 u64 ow_gen; 3533 int did_overwrite = 0; 3534 int is_orphan = 0; 3535 u64 last_dir_ino_rm = 0; 3536 bool can_rename = true; 3537 3538verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3539 3540 /* 3541 * This should never happen as the root dir always has the same ref 3542 * which is always '..' 3543 */ 3544 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 3545 INIT_LIST_HEAD(&check_dirs); 3546 3547 valid_path = fs_path_alloc(); 3548 if (!valid_path) { 3549 ret = -ENOMEM; 3550 goto out; 3551 } 3552 3553 /* 3554 * First, check if the first ref of the current inode was overwritten 3555 * before. If yes, we know that the current inode was already orphanized 3556 * and thus use the orphan name. If not, we can use get_cur_path to 3557 * get the path of the first ref as it would like while receiving at 3558 * this point in time. 3559 * New inodes are always orphan at the beginning, so force to use the 3560 * orphan name in this case. 3561 * The first ref is stored in valid_path and will be updated if it 3562 * gets moved around. 3563 */ 3564 if (!sctx->cur_inode_new) { 3565 ret = did_overwrite_first_ref(sctx, sctx->cur_ino, 3566 sctx->cur_inode_gen); 3567 if (ret < 0) 3568 goto out; 3569 if (ret) 3570 did_overwrite = 1; 3571 } 3572 if (sctx->cur_inode_new || did_overwrite) { 3573 ret = gen_unique_name(sctx, sctx->cur_ino, 3574 sctx->cur_inode_gen, valid_path); 3575 if (ret < 0) 3576 goto out; 3577 is_orphan = 1; 3578 } else { 3579 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3580 valid_path); 3581 if (ret < 0) 3582 goto out; 3583 } 3584 3585 list_for_each_entry(cur, &sctx->new_refs, list) { 3586 /* 3587 * We may have refs where the parent directory does not exist 3588 * yet. This happens if the parent directories inum is higher 3589 * the the current inum. To handle this case, we create the 3590 * parent directory out of order. But we need to check if this 3591 * did already happen before due to other refs in the same dir. 3592 */ 3593 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3594 if (ret < 0) 3595 goto out; 3596 if (ret == inode_state_will_create) { 3597 ret = 0; 3598 /* 3599 * First check if any of the current inodes refs did 3600 * already create the dir. 3601 */ 3602 list_for_each_entry(cur2, &sctx->new_refs, list) { 3603 if (cur == cur2) 3604 break; 3605 if (cur2->dir == cur->dir) { 3606 ret = 1; 3607 break; 3608 } 3609 } 3610 3611 /* 3612 * If that did not happen, check if a previous inode 3613 * did already create the dir. 3614 */ 3615 if (!ret) 3616 ret = did_create_dir(sctx, cur->dir); 3617 if (ret < 0) 3618 goto out; 3619 if (!ret) { 3620 ret = send_create_inode(sctx, cur->dir); 3621 if (ret < 0) 3622 goto out; 3623 } 3624 } 3625 3626 /* 3627 * Check if this new ref would overwrite the first ref of 3628 * another unprocessed inode. If yes, orphanize the 3629 * overwritten inode. If we find an overwritten ref that is 3630 * not the first ref, simply unlink it. 3631 */ 3632 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3633 cur->name, cur->name_len, 3634 &ow_inode, &ow_gen); 3635 if (ret < 0) 3636 goto out; 3637 if (ret) { 3638 ret = is_first_ref(sctx->parent_root, 3639 ow_inode, cur->dir, cur->name, 3640 cur->name_len); 3641 if (ret < 0) 3642 goto out; 3643 if (ret) { 3644 struct name_cache_entry *nce; 3645 3646 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3647 cur->full_path); 3648 if (ret < 0) 3649 goto out; 3650 /* 3651 * Make sure we clear our orphanized inode's 3652 * name from the name cache. This is because the 3653 * inode ow_inode might be an ancestor of some 3654 * other inode that will be orphanized as well 3655 * later and has an inode number greater than 3656 * sctx->send_progress. We need to prevent 3657 * future name lookups from using the old name 3658 * and get instead the orphan name. 3659 */ 3660 nce = name_cache_search(sctx, ow_inode, ow_gen); 3661 if (nce) { 3662 name_cache_delete(sctx, nce); 3663 kfree(nce); 3664 } 3665 } else { 3666 ret = send_unlink(sctx, cur->full_path); 3667 if (ret < 0) 3668 goto out; 3669 } 3670 } 3671 3672 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { 3673 ret = wait_for_dest_dir_move(sctx, cur, is_orphan); 3674 if (ret < 0) 3675 goto out; 3676 if (ret == 1) { 3677 can_rename = false; 3678 *pending_move = 1; 3679 } 3680 } 3681 3682 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root && 3683 can_rename) { 3684 ret = wait_for_parent_move(sctx, cur, is_orphan); 3685 if (ret < 0) 3686 goto out; 3687 if (ret == 1) { 3688 can_rename = false; 3689 *pending_move = 1; 3690 } 3691 } 3692 3693 /* 3694 * link/move the ref to the new place. If we have an orphan 3695 * inode, move it and update valid_path. If not, link or move 3696 * it depending on the inode mode. 3697 */ 3698 if (is_orphan && can_rename) { 3699 ret = send_rename(sctx, valid_path, cur->full_path); 3700 if (ret < 0) 3701 goto out; 3702 is_orphan = 0; 3703 ret = fs_path_copy(valid_path, cur->full_path); 3704 if (ret < 0) 3705 goto out; 3706 } else if (can_rename) { 3707 if (S_ISDIR(sctx->cur_inode_mode)) { 3708 /* 3709 * Dirs can't be linked, so move it. For moved 3710 * dirs, we always have one new and one deleted 3711 * ref. The deleted ref is ignored later. 3712 */ 3713 ret = send_rename(sctx, valid_path, 3714 cur->full_path); 3715 if (!ret) 3716 ret = fs_path_copy(valid_path, 3717 cur->full_path); 3718 if (ret < 0) 3719 goto out; 3720 } else { 3721 ret = send_link(sctx, cur->full_path, 3722 valid_path); 3723 if (ret < 0) 3724 goto out; 3725 } 3726 } 3727 ret = dup_ref(cur, &check_dirs); 3728 if (ret < 0) 3729 goto out; 3730 } 3731 3732 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { 3733 /* 3734 * Check if we can already rmdir the directory. If not, 3735 * orphanize it. For every dir item inside that gets deleted 3736 * later, we do this check again and rmdir it then if possible. 3737 * See the use of check_dirs for more details. 3738 */ 3739 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3740 sctx->cur_ino); 3741 if (ret < 0) 3742 goto out; 3743 if (ret) { 3744 ret = send_rmdir(sctx, valid_path); 3745 if (ret < 0) 3746 goto out; 3747 } else if (!is_orphan) { 3748 ret = orphanize_inode(sctx, sctx->cur_ino, 3749 sctx->cur_inode_gen, valid_path); 3750 if (ret < 0) 3751 goto out; 3752 is_orphan = 1; 3753 } 3754 3755 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3756 ret = dup_ref(cur, &check_dirs); 3757 if (ret < 0) 3758 goto out; 3759 } 3760 } else if (S_ISDIR(sctx->cur_inode_mode) && 3761 !list_empty(&sctx->deleted_refs)) { 3762 /* 3763 * We have a moved dir. Add the old parent to check_dirs 3764 */ 3765 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 3766 list); 3767 ret = dup_ref(cur, &check_dirs); 3768 if (ret < 0) 3769 goto out; 3770 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 3771 /* 3772 * We have a non dir inode. Go through all deleted refs and 3773 * unlink them if they were not already overwritten by other 3774 * inodes. 3775 */ 3776 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3777 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3778 sctx->cur_ino, sctx->cur_inode_gen, 3779 cur->name, cur->name_len); 3780 if (ret < 0) 3781 goto out; 3782 if (!ret) { 3783 ret = send_unlink(sctx, cur->full_path); 3784 if (ret < 0) 3785 goto out; 3786 } 3787 ret = dup_ref(cur, &check_dirs); 3788 if (ret < 0) 3789 goto out; 3790 } 3791 /* 3792 * If the inode is still orphan, unlink the orphan. This may 3793 * happen when a previous inode did overwrite the first ref 3794 * of this inode and no new refs were added for the current 3795 * inode. Unlinking does not mean that the inode is deleted in 3796 * all cases. There may still be links to this inode in other 3797 * places. 3798 */ 3799 if (is_orphan) { 3800 ret = send_unlink(sctx, valid_path); 3801 if (ret < 0) 3802 goto out; 3803 } 3804 } 3805 3806 /* 3807 * We did collect all parent dirs where cur_inode was once located. We 3808 * now go through all these dirs and check if they are pending for 3809 * deletion and if it's finally possible to perform the rmdir now. 3810 * We also update the inode stats of the parent dirs here. 3811 */ 3812 list_for_each_entry(cur, &check_dirs, list) { 3813 /* 3814 * In case we had refs into dirs that were not processed yet, 3815 * we don't need to do the utime and rmdir logic for these dirs. 3816 * The dir will be processed later. 3817 */ 3818 if (cur->dir > sctx->cur_ino) 3819 continue; 3820 3821 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3822 if (ret < 0) 3823 goto out; 3824 3825 if (ret == inode_state_did_create || 3826 ret == inode_state_no_change) { 3827 /* TODO delayed utimes */ 3828 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3829 if (ret < 0) 3830 goto out; 3831 } else if (ret == inode_state_did_delete && 3832 cur->dir != last_dir_ino_rm) { 3833 ret = can_rmdir(sctx, cur->dir, cur->dir_gen, 3834 sctx->cur_ino); 3835 if (ret < 0) 3836 goto out; 3837 if (ret) { 3838 ret = get_cur_path(sctx, cur->dir, 3839 cur->dir_gen, valid_path); 3840 if (ret < 0) 3841 goto out; 3842 ret = send_rmdir(sctx, valid_path); 3843 if (ret < 0) 3844 goto out; 3845 last_dir_ino_rm = cur->dir; 3846 } 3847 } 3848 } 3849 3850 ret = 0; 3851 3852out: 3853 __free_recorded_refs(&check_dirs); 3854 free_recorded_refs(sctx); 3855 fs_path_free(valid_path); 3856 return ret; 3857} 3858 3859static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, 3860 struct fs_path *name, void *ctx, struct list_head *refs) 3861{ 3862 int ret = 0; 3863 struct send_ctx *sctx = ctx; 3864 struct fs_path *p; 3865 u64 gen; 3866 3867 p = fs_path_alloc(); 3868 if (!p) 3869 return -ENOMEM; 3870 3871 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, 3872 NULL, NULL); 3873 if (ret < 0) 3874 goto out; 3875 3876 ret = get_cur_path(sctx, dir, gen, p); 3877 if (ret < 0) 3878 goto out; 3879 ret = fs_path_add_path(p, name); 3880 if (ret < 0) 3881 goto out; 3882 3883 ret = __record_ref(refs, dir, gen, p); 3884 3885out: 3886 if (ret) 3887 fs_path_free(p); 3888 return ret; 3889} 3890 3891static int __record_new_ref(int num, u64 dir, int index, 3892 struct fs_path *name, 3893 void *ctx) 3894{ 3895 struct send_ctx *sctx = ctx; 3896 return record_ref(sctx->send_root, num, dir, index, name, 3897 ctx, &sctx->new_refs); 3898} 3899 3900 3901static int __record_deleted_ref(int num, u64 dir, int index, 3902 struct fs_path *name, 3903 void *ctx) 3904{ 3905 struct send_ctx *sctx = ctx; 3906 return record_ref(sctx->parent_root, num, dir, index, name, 3907 ctx, &sctx->deleted_refs); 3908} 3909 3910static int record_new_ref(struct send_ctx *sctx) 3911{ 3912 int ret; 3913 3914 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3915 sctx->cmp_key, 0, __record_new_ref, sctx); 3916 if (ret < 0) 3917 goto out; 3918 ret = 0; 3919 3920out: 3921 return ret; 3922} 3923 3924static int record_deleted_ref(struct send_ctx *sctx) 3925{ 3926 int ret; 3927 3928 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3929 sctx->cmp_key, 0, __record_deleted_ref, sctx); 3930 if (ret < 0) 3931 goto out; 3932 ret = 0; 3933 3934out: 3935 return ret; 3936} 3937 3938struct find_ref_ctx { 3939 u64 dir; 3940 u64 dir_gen; 3941 struct btrfs_root *root; 3942 struct fs_path *name; 3943 int found_idx; 3944}; 3945 3946static int __find_iref(int num, u64 dir, int index, 3947 struct fs_path *name, 3948 void *ctx_) 3949{ 3950 struct find_ref_ctx *ctx = ctx_; 3951 u64 dir_gen; 3952 int ret; 3953 3954 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3955 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3956 /* 3957 * To avoid doing extra lookups we'll only do this if everything 3958 * else matches. 3959 */ 3960 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, 3961 NULL, NULL, NULL); 3962 if (ret) 3963 return ret; 3964 if (dir_gen != ctx->dir_gen) 3965 return 0; 3966 ctx->found_idx = num; 3967 return 1; 3968 } 3969 return 0; 3970} 3971 3972static int find_iref(struct btrfs_root *root, 3973 struct btrfs_path *path, 3974 struct btrfs_key *key, 3975 u64 dir, u64 dir_gen, struct fs_path *name) 3976{ 3977 int ret; 3978 struct find_ref_ctx ctx; 3979 3980 ctx.dir = dir; 3981 ctx.name = name; 3982 ctx.dir_gen = dir_gen; 3983 ctx.found_idx = -1; 3984 ctx.root = root; 3985 3986 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3987 if (ret < 0) 3988 return ret; 3989 3990 if (ctx.found_idx == -1) 3991 return -ENOENT; 3992 3993 return ctx.found_idx; 3994} 3995 3996static int __record_changed_new_ref(int num, u64 dir, int index, 3997 struct fs_path *name, 3998 void *ctx) 3999{ 4000 u64 dir_gen; 4001 int ret; 4002 struct send_ctx *sctx = ctx; 4003 4004 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, 4005 NULL, NULL, NULL); 4006 if (ret) 4007 return ret; 4008 4009 ret = find_iref(sctx->parent_root, sctx->right_path, 4010 sctx->cmp_key, dir, dir_gen, name); 4011 if (ret == -ENOENT) 4012 ret = __record_new_ref(num, dir, index, name, sctx); 4013 else if (ret > 0) 4014 ret = 0; 4015 4016 return ret; 4017} 4018 4019static int __record_changed_deleted_ref(int num, u64 dir, int index, 4020 struct fs_path *name, 4021 void *ctx) 4022{ 4023 u64 dir_gen; 4024 int ret; 4025 struct send_ctx *sctx = ctx; 4026 4027 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, 4028 NULL, NULL, NULL); 4029 if (ret) 4030 return ret; 4031 4032 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 4033 dir, dir_gen, name); 4034 if (ret == -ENOENT) 4035 ret = __record_deleted_ref(num, dir, index, name, sctx); 4036 else if (ret > 0) 4037 ret = 0; 4038 4039 return ret; 4040} 4041 4042static int record_changed_ref(struct send_ctx *sctx) 4043{ 4044 int ret = 0; 4045 4046 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4047 sctx->cmp_key, 0, __record_changed_new_ref, sctx); 4048 if (ret < 0) 4049 goto out; 4050 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4051 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); 4052 if (ret < 0) 4053 goto out; 4054 ret = 0; 4055 4056out: 4057 return ret; 4058} 4059 4060/* 4061 * Record and process all refs at once. Needed when an inode changes the 4062 * generation number, which means that it was deleted and recreated. 4063 */ 4064static int process_all_refs(struct send_ctx *sctx, 4065 enum btrfs_compare_tree_result cmd) 4066{ 4067 int ret; 4068 struct btrfs_root *root; 4069 struct btrfs_path *path; 4070 struct btrfs_key key; 4071 struct btrfs_key found_key; 4072 struct extent_buffer *eb; 4073 int slot; 4074 iterate_inode_ref_t cb; 4075 int pending_move = 0; 4076 4077 path = alloc_path_for_send(); 4078 if (!path) 4079 return -ENOMEM; 4080 4081 if (cmd == BTRFS_COMPARE_TREE_NEW) { 4082 root = sctx->send_root; 4083 cb = __record_new_ref; 4084 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { 4085 root = sctx->parent_root; 4086 cb = __record_deleted_ref; 4087 } else { 4088 btrfs_err(sctx->send_root->fs_info, 4089 "Wrong command %d in process_all_refs", cmd); 4090 ret = -EINVAL; 4091 goto out; 4092 } 4093 4094 key.objectid = sctx->cmp_key->objectid; 4095 key.type = BTRFS_INODE_REF_KEY; 4096 key.offset = 0; 4097 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4098 if (ret < 0) 4099 goto out; 4100 4101 while (1) { 4102 eb = path->nodes[0]; 4103 slot = path->slots[0]; 4104 if (slot >= btrfs_header_nritems(eb)) { 4105 ret = btrfs_next_leaf(root, path); 4106 if (ret < 0) 4107 goto out; 4108 else if (ret > 0) 4109 break; 4110 continue; 4111 } 4112 4113 btrfs_item_key_to_cpu(eb, &found_key, slot); 4114 4115 if (found_key.objectid != key.objectid || 4116 (found_key.type != BTRFS_INODE_REF_KEY && 4117 found_key.type != BTRFS_INODE_EXTREF_KEY)) 4118 break; 4119 4120 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 4121 if (ret < 0) 4122 goto out; 4123 4124 path->slots[0]++; 4125 } 4126 btrfs_release_path(path); 4127 4128 ret = process_recorded_refs(sctx, &pending_move); 4129 /* Only applicable to an incremental send. */ 4130 ASSERT(pending_move == 0); 4131 4132out: 4133 btrfs_free_path(path); 4134 return ret; 4135} 4136 4137static int send_set_xattr(struct send_ctx *sctx, 4138 struct fs_path *path, 4139 const char *name, int name_len, 4140 const char *data, int data_len) 4141{ 4142 int ret = 0; 4143 4144 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 4145 if (ret < 0) 4146 goto out; 4147 4148 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4149 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4150 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); 4151 4152 ret = send_cmd(sctx); 4153 4154tlv_put_failure: 4155out: 4156 return ret; 4157} 4158 4159static int send_remove_xattr(struct send_ctx *sctx, 4160 struct fs_path *path, 4161 const char *name, int name_len) 4162{ 4163 int ret = 0; 4164 4165 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 4166 if (ret < 0) 4167 goto out; 4168 4169 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4170 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4171 4172 ret = send_cmd(sctx); 4173 4174tlv_put_failure: 4175out: 4176 return ret; 4177} 4178 4179static int __process_new_xattr(int num, struct btrfs_key *di_key, 4180 const char *name, int name_len, 4181 const char *data, int data_len, 4182 u8 type, void *ctx) 4183{ 4184 int ret; 4185 struct send_ctx *sctx = ctx; 4186 struct fs_path *p; 4187 posix_acl_xattr_header dummy_acl; 4188 4189 p = fs_path_alloc(); 4190 if (!p) 4191 return -ENOMEM; 4192 4193 /* 4194 * This hack is needed because empty acl's are stored as zero byte 4195 * data in xattrs. Problem with that is, that receiving these zero byte 4196 * acl's will fail later. To fix this, we send a dummy acl list that 4197 * only contains the version number and no entries. 4198 */ 4199 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || 4200 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { 4201 if (data_len == 0) { 4202 dummy_acl.a_version = 4203 cpu_to_le32(POSIX_ACL_XATTR_VERSION); 4204 data = (char *)&dummy_acl; 4205 data_len = sizeof(dummy_acl); 4206 } 4207 } 4208 4209 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4210 if (ret < 0) 4211 goto out; 4212 4213 ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 4214 4215out: 4216 fs_path_free(p); 4217 return ret; 4218} 4219 4220static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 4221 const char *name, int name_len, 4222 const char *data, int data_len, 4223 u8 type, void *ctx) 4224{ 4225 int ret; 4226 struct send_ctx *sctx = ctx; 4227 struct fs_path *p; 4228 4229 p = fs_path_alloc(); 4230 if (!p) 4231 return -ENOMEM; 4232 4233 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4234 if (ret < 0) 4235 goto out; 4236 4237 ret = send_remove_xattr(sctx, p, name, name_len); 4238 4239out: 4240 fs_path_free(p); 4241 return ret; 4242} 4243 4244static int process_new_xattr(struct send_ctx *sctx) 4245{ 4246 int ret = 0; 4247 4248 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4249 sctx->cmp_key, __process_new_xattr, sctx); 4250 4251 return ret; 4252} 4253 4254static int process_deleted_xattr(struct send_ctx *sctx) 4255{ 4256 int ret; 4257 4258 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4259 sctx->cmp_key, __process_deleted_xattr, sctx); 4260 4261 return ret; 4262} 4263 4264struct find_xattr_ctx { 4265 const char *name; 4266 int name_len; 4267 int found_idx; 4268 char *found_data; 4269 int found_data_len; 4270}; 4271 4272static int __find_xattr(int num, struct btrfs_key *di_key, 4273 const char *name, int name_len, 4274 const char *data, int data_len, 4275 u8 type, void *vctx) 4276{ 4277 struct find_xattr_ctx *ctx = vctx; 4278 4279 if (name_len == ctx->name_len && 4280 strncmp(name, ctx->name, name_len) == 0) { 4281 ctx->found_idx = num; 4282 ctx->found_data_len = data_len; 4283 ctx->found_data = kmemdup(data, data_len, GFP_NOFS); 4284 if (!ctx->found_data) 4285 return -ENOMEM; 4286 return 1; 4287 } 4288 return 0; 4289} 4290 4291static int find_xattr(struct btrfs_root *root, 4292 struct btrfs_path *path, 4293 struct btrfs_key *key, 4294 const char *name, int name_len, 4295 char **data, int *data_len) 4296{ 4297 int ret; 4298 struct find_xattr_ctx ctx; 4299 4300 ctx.name = name; 4301 ctx.name_len = name_len; 4302 ctx.found_idx = -1; 4303 ctx.found_data = NULL; 4304 ctx.found_data_len = 0; 4305 4306 ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); 4307 if (ret < 0) 4308 return ret; 4309 4310 if (ctx.found_idx == -1) 4311 return -ENOENT; 4312 if (data) { 4313 *data = ctx.found_data; 4314 *data_len = ctx.found_data_len; 4315 } else { 4316 kfree(ctx.found_data); 4317 } 4318 return ctx.found_idx; 4319} 4320 4321 4322static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, 4323 const char *name, int name_len, 4324 const char *data, int data_len, 4325 u8 type, void *ctx) 4326{ 4327 int ret; 4328 struct send_ctx *sctx = ctx; 4329 char *found_data = NULL; 4330 int found_data_len = 0; 4331 4332 ret = find_xattr(sctx->parent_root, sctx->right_path, 4333 sctx->cmp_key, name, name_len, &found_data, 4334 &found_data_len); 4335 if (ret == -ENOENT) { 4336 ret = __process_new_xattr(num, di_key, name, name_len, data, 4337 data_len, type, ctx); 4338 } else if (ret >= 0) { 4339 if (data_len != found_data_len || 4340 memcmp(data, found_data, data_len)) { 4341 ret = __process_new_xattr(num, di_key, name, name_len, 4342 data, data_len, type, ctx); 4343 } else { 4344 ret = 0; 4345 } 4346 } 4347 4348 kfree(found_data); 4349 return ret; 4350} 4351 4352static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, 4353 const char *name, int name_len, 4354 const char *data, int data_len, 4355 u8 type, void *ctx) 4356{ 4357 int ret; 4358 struct send_ctx *sctx = ctx; 4359 4360 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, 4361 name, name_len, NULL, NULL); 4362 if (ret == -ENOENT) 4363 ret = __process_deleted_xattr(num, di_key, name, name_len, data, 4364 data_len, type, ctx); 4365 else if (ret >= 0) 4366 ret = 0; 4367 4368 return ret; 4369} 4370 4371static int process_changed_xattr(struct send_ctx *sctx) 4372{ 4373 int ret = 0; 4374 4375 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4376 sctx->cmp_key, __process_changed_new_xattr, sctx); 4377 if (ret < 0) 4378 goto out; 4379 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4380 sctx->cmp_key, __process_changed_deleted_xattr, sctx); 4381 4382out: 4383 return ret; 4384} 4385 4386static int process_all_new_xattrs(struct send_ctx *sctx) 4387{ 4388 int ret; 4389 struct btrfs_root *root; 4390 struct btrfs_path *path; 4391 struct btrfs_key key; 4392 struct btrfs_key found_key; 4393 struct extent_buffer *eb; 4394 int slot; 4395 4396 path = alloc_path_for_send(); 4397 if (!path) 4398 return -ENOMEM; 4399 4400 root = sctx->send_root; 4401 4402 key.objectid = sctx->cmp_key->objectid; 4403 key.type = BTRFS_XATTR_ITEM_KEY; 4404 key.offset = 0; 4405 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4406 if (ret < 0) 4407 goto out; 4408 4409 while (1) { 4410 eb = path->nodes[0]; 4411 slot = path->slots[0]; 4412 if (slot >= btrfs_header_nritems(eb)) { 4413 ret = btrfs_next_leaf(root, path); 4414 if (ret < 0) { 4415 goto out; 4416 } else if (ret > 0) { 4417 ret = 0; 4418 break; 4419 } 4420 continue; 4421 } 4422 4423 btrfs_item_key_to_cpu(eb, &found_key, slot); 4424 if (found_key.objectid != key.objectid || 4425 found_key.type != key.type) { 4426 ret = 0; 4427 goto out; 4428 } 4429 4430 ret = iterate_dir_item(root, path, &found_key, 4431 __process_new_xattr, sctx); 4432 if (ret < 0) 4433 goto out; 4434 4435 path->slots[0]++; 4436 } 4437 4438out: 4439 btrfs_free_path(path); 4440 return ret; 4441} 4442 4443static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) 4444{ 4445 struct btrfs_root *root = sctx->send_root; 4446 struct btrfs_fs_info *fs_info = root->fs_info; 4447 struct inode *inode; 4448 struct page *page; 4449 char *addr; 4450 struct btrfs_key key; 4451 pgoff_t index = offset >> PAGE_CACHE_SHIFT; 4452 pgoff_t last_index; 4453 unsigned pg_offset = offset & ~PAGE_CACHE_MASK; 4454 ssize_t ret = 0; 4455 4456 key.objectid = sctx->cur_ino; 4457 key.type = BTRFS_INODE_ITEM_KEY; 4458 key.offset = 0; 4459 4460 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4461 if (IS_ERR(inode)) 4462 return PTR_ERR(inode); 4463 4464 if (offset + len > i_size_read(inode)) { 4465 if (offset > i_size_read(inode)) 4466 len = 0; 4467 else 4468 len = offset - i_size_read(inode); 4469 } 4470 if (len == 0) 4471 goto out; 4472 4473 last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; 4474 4475 /* initial readahead */ 4476 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 4477 file_ra_state_init(&sctx->ra, inode->i_mapping); 4478 btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, 4479 last_index - index + 1); 4480 4481 while (index <= last_index) { 4482 unsigned cur_len = min_t(unsigned, len, 4483 PAGE_CACHE_SIZE - pg_offset); 4484 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 4485 if (!page) { 4486 ret = -ENOMEM; 4487 break; 4488 } 4489 4490 if (!PageUptodate(page)) { 4491 btrfs_readpage(NULL, page); 4492 lock_page(page); 4493 if (!PageUptodate(page)) { 4494 unlock_page(page); 4495 page_cache_release(page); 4496 ret = -EIO; 4497 break; 4498 } 4499 } 4500 4501 addr = kmap(page); 4502 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); 4503 kunmap(page); 4504 unlock_page(page); 4505 page_cache_release(page); 4506 index++; 4507 pg_offset = 0; 4508 len -= cur_len; 4509 ret += cur_len; 4510 } 4511out: 4512 iput(inode); 4513 return ret; 4514} 4515 4516/* 4517 * Read some bytes from the current inode/file and send a write command to 4518 * user space. 4519 */ 4520static int send_write(struct send_ctx *sctx, u64 offset, u32 len) 4521{ 4522 int ret = 0; 4523 struct fs_path *p; 4524 ssize_t num_read = 0; 4525 4526 p = fs_path_alloc(); 4527 if (!p) 4528 return -ENOMEM; 4529 4530verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); 4531 4532 num_read = fill_read_buf(sctx, offset, len); 4533 if (num_read <= 0) { 4534 if (num_read < 0) 4535 ret = num_read; 4536 goto out; 4537 } 4538 4539 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4540 if (ret < 0) 4541 goto out; 4542 4543 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4544 if (ret < 0) 4545 goto out; 4546 4547 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4548 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4549 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); 4550 4551 ret = send_cmd(sctx); 4552 4553tlv_put_failure: 4554out: 4555 fs_path_free(p); 4556 if (ret < 0) 4557 return ret; 4558 return num_read; 4559} 4560 4561/* 4562 * Send a clone command to user space. 4563 */ 4564static int send_clone(struct send_ctx *sctx, 4565 u64 offset, u32 len, 4566 struct clone_root *clone_root) 4567{ 4568 int ret = 0; 4569 struct fs_path *p; 4570 u64 gen; 4571 4572verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " 4573 "clone_inode=%llu, clone_offset=%llu\n", offset, len, 4574 clone_root->root->objectid, clone_root->ino, 4575 clone_root->offset); 4576 4577 p = fs_path_alloc(); 4578 if (!p) 4579 return -ENOMEM; 4580 4581 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); 4582 if (ret < 0) 4583 goto out; 4584 4585 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4586 if (ret < 0) 4587 goto out; 4588 4589 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4590 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 4591 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4592 4593 if (clone_root->root == sctx->send_root) { 4594 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, 4595 &gen, NULL, NULL, NULL, NULL); 4596 if (ret < 0) 4597 goto out; 4598 ret = get_cur_path(sctx, clone_root->ino, gen, p); 4599 } else { 4600 ret = get_inode_path(clone_root->root, clone_root->ino, p); 4601 } 4602 if (ret < 0) 4603 goto out; 4604 4605 /* 4606 * If the parent we're using has a received_uuid set then use that as 4607 * our clone source as that is what we will look for when doing a 4608 * receive. 4609 * 4610 * This covers the case that we create a snapshot off of a received 4611 * subvolume and then use that as the parent and try to receive on a 4612 * different host. 4613 */ 4614 if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid)) 4615 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4616 clone_root->root->root_item.received_uuid); 4617 else 4618 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4619 clone_root->root->root_item.uuid); 4620 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4621 le64_to_cpu(clone_root->root->root_item.ctransid)); 4622 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4623 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4624 clone_root->offset); 4625 4626 ret = send_cmd(sctx); 4627 4628tlv_put_failure: 4629out: 4630 fs_path_free(p); 4631 return ret; 4632} 4633 4634/* 4635 * Send an update extent command to user space. 4636 */ 4637static int send_update_extent(struct send_ctx *sctx, 4638 u64 offset, u32 len) 4639{ 4640 int ret = 0; 4641 struct fs_path *p; 4642 4643 p = fs_path_alloc(); 4644 if (!p) 4645 return -ENOMEM; 4646 4647 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 4648 if (ret < 0) 4649 goto out; 4650 4651 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4652 if (ret < 0) 4653 goto out; 4654 4655 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4656 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4657 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); 4658 4659 ret = send_cmd(sctx); 4660 4661tlv_put_failure: 4662out: 4663 fs_path_free(p); 4664 return ret; 4665} 4666 4667static int send_hole(struct send_ctx *sctx, u64 end) 4668{ 4669 struct fs_path *p = NULL; 4670 u64 offset = sctx->cur_inode_last_extent; 4671 u64 len; 4672 int ret = 0; 4673 4674 p = fs_path_alloc(); 4675 if (!p) 4676 return -ENOMEM; 4677 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4678 if (ret < 0) 4679 goto tlv_put_failure; 4680 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4681 while (offset < end) { 4682 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4683 4684 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4685 if (ret < 0) 4686 break; 4687 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4688 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4689 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4690 ret = send_cmd(sctx); 4691 if (ret < 0) 4692 break; 4693 offset += len; 4694 } 4695tlv_put_failure: 4696 fs_path_free(p); 4697 return ret; 4698} 4699 4700static int send_extent_data(struct send_ctx *sctx, 4701 const u64 offset, 4702 const u64 len) 4703{ 4704 u64 sent = 0; 4705 4706 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) 4707 return send_update_extent(sctx, offset, len); 4708 4709 while (sent < len) { 4710 u64 size = len - sent; 4711 int ret; 4712 4713 if (size > BTRFS_SEND_READ_SIZE) 4714 size = BTRFS_SEND_READ_SIZE; 4715 ret = send_write(sctx, offset + sent, size); 4716 if (ret < 0) 4717 return ret; 4718 if (!ret) 4719 break; 4720 sent += ret; 4721 } 4722 return 0; 4723} 4724 4725static int clone_range(struct send_ctx *sctx, 4726 struct clone_root *clone_root, 4727 const u64 disk_byte, 4728 u64 data_offset, 4729 u64 offset, 4730 u64 len) 4731{ 4732 struct btrfs_path *path; 4733 struct btrfs_key key; 4734 int ret; 4735 4736 path = alloc_path_for_send(); 4737 if (!path) 4738 return -ENOMEM; 4739 4740 /* 4741 * We can't send a clone operation for the entire range if we find 4742 * extent items in the respective range in the source file that 4743 * refer to different extents or if we find holes. 4744 * So check for that and do a mix of clone and regular write/copy 4745 * operations if needed. 4746 * 4747 * Example: 4748 * 4749 * mkfs.btrfs -f /dev/sda 4750 * mount /dev/sda /mnt 4751 * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo 4752 * cp --reflink=always /mnt/foo /mnt/bar 4753 * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo 4754 * btrfs subvolume snapshot -r /mnt /mnt/snap 4755 * 4756 * If when we send the snapshot and we are processing file bar (which 4757 * has a higher inode number than foo) we blindly send a clone operation 4758 * for the [0, 100K[ range from foo to bar, the receiver ends up getting 4759 * a file bar that matches the content of file foo - iow, doesn't match 4760 * the content from bar in the original filesystem. 4761 */ 4762 key.objectid = clone_root->ino; 4763 key.type = BTRFS_EXTENT_DATA_KEY; 4764 key.offset = clone_root->offset; 4765 ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0); 4766 if (ret < 0) 4767 goto out; 4768 if (ret > 0 && path->slots[0] > 0) { 4769 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); 4770 if (key.objectid == clone_root->ino && 4771 key.type == BTRFS_EXTENT_DATA_KEY) 4772 path->slots[0]--; 4773 } 4774 4775 while (true) { 4776 struct extent_buffer *leaf = path->nodes[0]; 4777 int slot = path->slots[0]; 4778 struct btrfs_file_extent_item *ei; 4779 u8 type; 4780 u64 ext_len; 4781 u64 clone_len; 4782 4783 if (slot >= btrfs_header_nritems(leaf)) { 4784 ret = btrfs_next_leaf(clone_root->root, path); 4785 if (ret < 0) 4786 goto out; 4787 else if (ret > 0) 4788 break; 4789 continue; 4790 } 4791 4792 btrfs_item_key_to_cpu(leaf, &key, slot); 4793 4794 /* 4795 * We might have an implicit trailing hole (NO_HOLES feature 4796 * enabled). We deal with it after leaving this loop. 4797 */ 4798 if (key.objectid != clone_root->ino || 4799 key.type != BTRFS_EXTENT_DATA_KEY) 4800 break; 4801 4802 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 4803 type = btrfs_file_extent_type(leaf, ei); 4804 if (type == BTRFS_FILE_EXTENT_INLINE) { 4805 ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); 4806 ext_len = PAGE_CACHE_ALIGN(ext_len); 4807 } else { 4808 ext_len = btrfs_file_extent_num_bytes(leaf, ei); 4809 } 4810 4811 if (key.offset + ext_len <= clone_root->offset) 4812 goto next; 4813 4814 if (key.offset > clone_root->offset) { 4815 /* Implicit hole, NO_HOLES feature enabled. */ 4816 u64 hole_len = key.offset - clone_root->offset; 4817 4818 if (hole_len > len) 4819 hole_len = len; 4820 ret = send_extent_data(sctx, offset, hole_len); 4821 if (ret < 0) 4822 goto out; 4823 4824 len -= hole_len; 4825 if (len == 0) 4826 break; 4827 offset += hole_len; 4828 clone_root->offset += hole_len; 4829 data_offset += hole_len; 4830 } 4831 4832 if (key.offset >= clone_root->offset + len) 4833 break; 4834 4835 clone_len = min_t(u64, ext_len, len); 4836 4837 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && 4838 btrfs_file_extent_offset(leaf, ei) == data_offset) 4839 ret = send_clone(sctx, offset, clone_len, clone_root); 4840 else 4841 ret = send_extent_data(sctx, offset, clone_len); 4842 4843 if (ret < 0) 4844 goto out; 4845 4846 len -= clone_len; 4847 if (len == 0) 4848 break; 4849 offset += clone_len; 4850 clone_root->offset += clone_len; 4851 data_offset += clone_len; 4852next: 4853 path->slots[0]++; 4854 } 4855 4856 if (len > 0) 4857 ret = send_extent_data(sctx, offset, len); 4858 else 4859 ret = 0; 4860out: 4861 btrfs_free_path(path); 4862 return ret; 4863} 4864 4865static int send_write_or_clone(struct send_ctx *sctx, 4866 struct btrfs_path *path, 4867 struct btrfs_key *key, 4868 struct clone_root *clone_root) 4869{ 4870 int ret = 0; 4871 struct btrfs_file_extent_item *ei; 4872 u64 offset = key->offset; 4873 u64 len; 4874 u8 type; 4875 u64 bs = sctx->send_root->fs_info->sb->s_blocksize; 4876 4877 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4878 struct btrfs_file_extent_item); 4879 type = btrfs_file_extent_type(path->nodes[0], ei); 4880 if (type == BTRFS_FILE_EXTENT_INLINE) { 4881 len = btrfs_file_extent_inline_len(path->nodes[0], 4882 path->slots[0], ei); 4883 /* 4884 * it is possible the inline item won't cover the whole page, 4885 * but there may be items after this page. Make 4886 * sure to send the whole thing 4887 */ 4888 len = PAGE_CACHE_ALIGN(len); 4889 } else { 4890 len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 4891 } 4892 4893 if (offset + len > sctx->cur_inode_size) 4894 len = sctx->cur_inode_size - offset; 4895 if (len == 0) { 4896 ret = 0; 4897 goto out; 4898 } 4899 4900 if (clone_root && IS_ALIGNED(offset + len, bs)) { 4901 u64 disk_byte; 4902 u64 data_offset; 4903 4904 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); 4905 data_offset = btrfs_file_extent_offset(path->nodes[0], ei); 4906 ret = clone_range(sctx, clone_root, disk_byte, data_offset, 4907 offset, len); 4908 } else { 4909 ret = send_extent_data(sctx, offset, len); 4910 } 4911out: 4912 return ret; 4913} 4914 4915static int is_extent_unchanged(struct send_ctx *sctx, 4916 struct btrfs_path *left_path, 4917 struct btrfs_key *ekey) 4918{ 4919 int ret = 0; 4920 struct btrfs_key key; 4921 struct btrfs_path *path = NULL; 4922 struct extent_buffer *eb; 4923 int slot; 4924 struct btrfs_key found_key; 4925 struct btrfs_file_extent_item *ei; 4926 u64 left_disknr; 4927 u64 right_disknr; 4928 u64 left_offset; 4929 u64 right_offset; 4930 u64 left_offset_fixed; 4931 u64 left_len; 4932 u64 right_len; 4933 u64 left_gen; 4934 u64 right_gen; 4935 u8 left_type; 4936 u8 right_type; 4937 4938 path = alloc_path_for_send(); 4939 if (!path) 4940 return -ENOMEM; 4941 4942 eb = left_path->nodes[0]; 4943 slot = left_path->slots[0]; 4944 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4945 left_type = btrfs_file_extent_type(eb, ei); 4946 4947 if (left_type != BTRFS_FILE_EXTENT_REG) { 4948 ret = 0; 4949 goto out; 4950 } 4951 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4952 left_len = btrfs_file_extent_num_bytes(eb, ei); 4953 left_offset = btrfs_file_extent_offset(eb, ei); 4954 left_gen = btrfs_file_extent_generation(eb, ei); 4955 4956 /* 4957 * Following comments will refer to these graphics. L is the left 4958 * extents which we are checking at the moment. 1-8 are the right 4959 * extents that we iterate. 4960 * 4961 * |-----L-----| 4962 * |-1-|-2a-|-3-|-4-|-5-|-6-| 4963 * 4964 * |-----L-----| 4965 * |--1--|-2b-|...(same as above) 4966 * 4967 * Alternative situation. Happens on files where extents got split. 4968 * |-----L-----| 4969 * |-----------7-----------|-6-| 4970 * 4971 * Alternative situation. Happens on files which got larger. 4972 * |-----L-----| 4973 * |-8-| 4974 * Nothing follows after 8. 4975 */ 4976 4977 key.objectid = ekey->objectid; 4978 key.type = BTRFS_EXTENT_DATA_KEY; 4979 key.offset = ekey->offset; 4980 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 4981 if (ret < 0) 4982 goto out; 4983 if (ret) { 4984 ret = 0; 4985 goto out; 4986 } 4987 4988 /* 4989 * Handle special case where the right side has no extents at all. 4990 */ 4991 eb = path->nodes[0]; 4992 slot = path->slots[0]; 4993 btrfs_item_key_to_cpu(eb, &found_key, slot); 4994 if (found_key.objectid != key.objectid || 4995 found_key.type != key.type) { 4996 /* If we're a hole then just pretend nothing changed */ 4997 ret = (left_disknr) ? 0 : 1; 4998 goto out; 4999 } 5000 5001 /* 5002 * We're now on 2a, 2b or 7. 5003 */ 5004 key = found_key; 5005 while (key.offset < ekey->offset + left_len) { 5006 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 5007 right_type = btrfs_file_extent_type(eb, ei); 5008 if (right_type != BTRFS_FILE_EXTENT_REG) { 5009 ret = 0; 5010 goto out; 5011 } 5012 5013 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 5014 right_len = btrfs_file_extent_num_bytes(eb, ei); 5015 right_offset = btrfs_file_extent_offset(eb, ei); 5016 right_gen = btrfs_file_extent_generation(eb, ei); 5017 5018 /* 5019 * Are we at extent 8? If yes, we know the extent is changed. 5020 * This may only happen on the first iteration. 5021 */ 5022 if (found_key.offset + right_len <= ekey->offset) { 5023 /* If we're a hole just pretend nothing changed */ 5024 ret = (left_disknr) ? 0 : 1; 5025 goto out; 5026 } 5027 5028 left_offset_fixed = left_offset; 5029 if (key.offset < ekey->offset) { 5030 /* Fix the right offset for 2a and 7. */ 5031 right_offset += ekey->offset - key.offset; 5032 } else { 5033 /* Fix the left offset for all behind 2a and 2b */ 5034 left_offset_fixed += key.offset - ekey->offset; 5035 } 5036 5037 /* 5038 * Check if we have the same extent. 5039 */ 5040 if (left_disknr != right_disknr || 5041 left_offset_fixed != right_offset || 5042 left_gen != right_gen) { 5043 ret = 0; 5044 goto out; 5045 } 5046 5047 /* 5048 * Go to the next extent. 5049 */ 5050 ret = btrfs_next_item(sctx->parent_root, path); 5051 if (ret < 0) 5052 goto out; 5053 if (!ret) { 5054 eb = path->nodes[0]; 5055 slot = path->slots[0]; 5056 btrfs_item_key_to_cpu(eb, &found_key, slot); 5057 } 5058 if (ret || found_key.objectid != key.objectid || 5059 found_key.type != key.type) { 5060 key.offset += right_len; 5061 break; 5062 } 5063 if (found_key.offset != key.offset + right_len) { 5064 ret = 0; 5065 goto out; 5066 } 5067 key = found_key; 5068 } 5069 5070 /* 5071 * We're now behind the left extent (treat as unchanged) or at the end 5072 * of the right side (treat as changed). 5073 */ 5074 if (key.offset >= ekey->offset + left_len) 5075 ret = 1; 5076 else 5077 ret = 0; 5078 5079 5080out: 5081 btrfs_free_path(path); 5082 return ret; 5083} 5084 5085static int get_last_extent(struct send_ctx *sctx, u64 offset) 5086{ 5087 struct btrfs_path *path; 5088 struct btrfs_root *root = sctx->send_root; 5089 struct btrfs_file_extent_item *fi; 5090 struct btrfs_key key; 5091 u64 extent_end; 5092 u8 type; 5093 int ret; 5094 5095 path = alloc_path_for_send(); 5096 if (!path) 5097 return -ENOMEM; 5098 5099 sctx->cur_inode_last_extent = 0; 5100 5101 key.objectid = sctx->cur_ino; 5102 key.type = BTRFS_EXTENT_DATA_KEY; 5103 key.offset = offset; 5104 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 5105 if (ret < 0) 5106 goto out; 5107 ret = 0; 5108 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 5109 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 5110 goto out; 5111 5112 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 5113 struct btrfs_file_extent_item); 5114 type = btrfs_file_extent_type(path->nodes[0], fi); 5115 if (type == BTRFS_FILE_EXTENT_INLINE) { 5116 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5117 path->slots[0], fi); 5118 extent_end = ALIGN(key.offset + size, 5119 sctx->send_root->sectorsize); 5120 } else { 5121 extent_end = key.offset + 5122 btrfs_file_extent_num_bytes(path->nodes[0], fi); 5123 } 5124 sctx->cur_inode_last_extent = extent_end; 5125out: 5126 btrfs_free_path(path); 5127 return ret; 5128} 5129 5130static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, 5131 struct btrfs_key *key) 5132{ 5133 struct btrfs_file_extent_item *fi; 5134 u64 extent_end; 5135 u8 type; 5136 int ret = 0; 5137 5138 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) 5139 return 0; 5140 5141 if (sctx->cur_inode_last_extent == (u64)-1) { 5142 ret = get_last_extent(sctx, key->offset - 1); 5143 if (ret) 5144 return ret; 5145 } 5146 5147 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 5148 struct btrfs_file_extent_item); 5149 type = btrfs_file_extent_type(path->nodes[0], fi); 5150 if (type == BTRFS_FILE_EXTENT_INLINE) { 5151 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5152 path->slots[0], fi); 5153 extent_end = ALIGN(key->offset + size, 5154 sctx->send_root->sectorsize); 5155 } else { 5156 extent_end = key->offset + 5157 btrfs_file_extent_num_bytes(path->nodes[0], fi); 5158 } 5159 5160 if (path->slots[0] == 0 && 5161 sctx->cur_inode_last_extent < key->offset) { 5162 /* 5163 * We might have skipped entire leafs that contained only 5164 * file extent items for our current inode. These leafs have 5165 * a generation number smaller (older) than the one in the 5166 * current leaf and the leaf our last extent came from, and 5167 * are located between these 2 leafs. 5168 */ 5169 ret = get_last_extent(sctx, key->offset - 1); 5170 if (ret) 5171 return ret; 5172 } 5173 5174 if (sctx->cur_inode_last_extent < key->offset) 5175 ret = send_hole(sctx, key->offset); 5176 sctx->cur_inode_last_extent = extent_end; 5177 return ret; 5178} 5179 5180static int process_extent(struct send_ctx *sctx, 5181 struct btrfs_path *path, 5182 struct btrfs_key *key) 5183{ 5184 struct clone_root *found_clone = NULL; 5185 int ret = 0; 5186 5187 if (S_ISLNK(sctx->cur_inode_mode)) 5188 return 0; 5189 5190 if (sctx->parent_root && !sctx->cur_inode_new) { 5191 ret = is_extent_unchanged(sctx, path, key); 5192 if (ret < 0) 5193 goto out; 5194 if (ret) { 5195 ret = 0; 5196 goto out_hole; 5197 } 5198 } else { 5199 struct btrfs_file_extent_item *ei; 5200 u8 type; 5201 5202 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 5203 struct btrfs_file_extent_item); 5204 type = btrfs_file_extent_type(path->nodes[0], ei); 5205 if (type == BTRFS_FILE_EXTENT_PREALLOC || 5206 type == BTRFS_FILE_EXTENT_REG) { 5207 /* 5208 * The send spec does not have a prealloc command yet, 5209 * so just leave a hole for prealloc'ed extents until 5210 * we have enough commands queued up to justify rev'ing 5211 * the send spec. 5212 */ 5213 if (type == BTRFS_FILE_EXTENT_PREALLOC) { 5214 ret = 0; 5215 goto out; 5216 } 5217 5218 /* Have a hole, just skip it. */ 5219 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { 5220 ret = 0; 5221 goto out; 5222 } 5223 } 5224 } 5225 5226 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 5227 sctx->cur_inode_size, &found_clone); 5228 if (ret != -ENOENT && ret < 0) 5229 goto out; 5230 5231 ret = send_write_or_clone(sctx, path, key, found_clone); 5232 if (ret) 5233 goto out; 5234out_hole: 5235 ret = maybe_send_hole(sctx, path, key); 5236out: 5237 return ret; 5238} 5239 5240static int process_all_extents(struct send_ctx *sctx) 5241{ 5242 int ret; 5243 struct btrfs_root *root; 5244 struct btrfs_path *path; 5245 struct btrfs_key key; 5246 struct btrfs_key found_key; 5247 struct extent_buffer *eb; 5248 int slot; 5249 5250 root = sctx->send_root; 5251 path = alloc_path_for_send(); 5252 if (!path) 5253 return -ENOMEM; 5254 5255 key.objectid = sctx->cmp_key->objectid; 5256 key.type = BTRFS_EXTENT_DATA_KEY; 5257 key.offset = 0; 5258 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5259 if (ret < 0) 5260 goto out; 5261 5262 while (1) { 5263 eb = path->nodes[0]; 5264 slot = path->slots[0]; 5265 5266 if (slot >= btrfs_header_nritems(eb)) { 5267 ret = btrfs_next_leaf(root, path); 5268 if (ret < 0) { 5269 goto out; 5270 } else if (ret > 0) { 5271 ret = 0; 5272 break; 5273 } 5274 continue; 5275 } 5276 5277 btrfs_item_key_to_cpu(eb, &found_key, slot); 5278 5279 if (found_key.objectid != key.objectid || 5280 found_key.type != key.type) { 5281 ret = 0; 5282 goto out; 5283 } 5284 5285 ret = process_extent(sctx, path, &found_key); 5286 if (ret < 0) 5287 goto out; 5288 5289 path->slots[0]++; 5290 } 5291 5292out: 5293 btrfs_free_path(path); 5294 return ret; 5295} 5296 5297static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 5298 int *pending_move, 5299 int *refs_processed) 5300{ 5301 int ret = 0; 5302 5303 if (sctx->cur_ino == 0) 5304 goto out; 5305 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 5306 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) 5307 goto out; 5308 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 5309 goto out; 5310 5311 ret = process_recorded_refs(sctx, pending_move); 5312 if (ret < 0) 5313 goto out; 5314 5315 *refs_processed = 1; 5316out: 5317 return ret; 5318} 5319 5320static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 5321{ 5322 int ret = 0; 5323 u64 left_mode; 5324 u64 left_uid; 5325 u64 left_gid; 5326 u64 right_mode; 5327 u64 right_uid; 5328 u64 right_gid; 5329 int need_chmod = 0; 5330 int need_chown = 0; 5331 int pending_move = 0; 5332 int refs_processed = 0; 5333 5334 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 5335 &refs_processed); 5336 if (ret < 0) 5337 goto out; 5338 5339 /* 5340 * We have processed the refs and thus need to advance send_progress. 5341 * Now, calls to get_cur_xxx will take the updated refs of the current 5342 * inode into account. 5343 * 5344 * On the other hand, if our current inode is a directory and couldn't 5345 * be moved/renamed because its parent was renamed/moved too and it has 5346 * a higher inode number, we can only move/rename our current inode 5347 * after we moved/renamed its parent. Therefore in this case operate on 5348 * the old path (pre move/rename) of our current inode, and the 5349 * move/rename will be performed later. 5350 */ 5351 if (refs_processed && !pending_move) 5352 sctx->send_progress = sctx->cur_ino + 1; 5353 5354 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 5355 goto out; 5356 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 5357 goto out; 5358 5359 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, 5360 &left_mode, &left_uid, &left_gid, NULL); 5361 if (ret < 0) 5362 goto out; 5363 5364 if (!sctx->parent_root || sctx->cur_inode_new) { 5365 need_chown = 1; 5366 if (!S_ISLNK(sctx->cur_inode_mode)) 5367 need_chmod = 1; 5368 } else { 5369 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 5370 NULL, NULL, &right_mode, &right_uid, 5371 &right_gid, NULL); 5372 if (ret < 0) 5373 goto out; 5374 5375 if (left_uid != right_uid || left_gid != right_gid) 5376 need_chown = 1; 5377 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) 5378 need_chmod = 1; 5379 } 5380 5381 if (S_ISREG(sctx->cur_inode_mode)) { 5382 if (need_send_hole(sctx)) { 5383 if (sctx->cur_inode_last_extent == (u64)-1 || 5384 sctx->cur_inode_last_extent < 5385 sctx->cur_inode_size) { 5386 ret = get_last_extent(sctx, (u64)-1); 5387 if (ret) 5388 goto out; 5389 } 5390 if (sctx->cur_inode_last_extent < 5391 sctx->cur_inode_size) { 5392 ret = send_hole(sctx, sctx->cur_inode_size); 5393 if (ret) 5394 goto out; 5395 } 5396 } 5397 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5398 sctx->cur_inode_size); 5399 if (ret < 0) 5400 goto out; 5401 } 5402 5403 if (need_chown) { 5404 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5405 left_uid, left_gid); 5406 if (ret < 0) 5407 goto out; 5408 } 5409 if (need_chmod) { 5410 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5411 left_mode); 5412 if (ret < 0) 5413 goto out; 5414 } 5415 5416 /* 5417 * If other directory inodes depended on our current directory 5418 * inode's move/rename, now do their move/rename operations. 5419 */ 5420 if (!is_waiting_for_move(sctx, sctx->cur_ino)) { 5421 ret = apply_children_dir_moves(sctx); 5422 if (ret) 5423 goto out; 5424 /* 5425 * Need to send that every time, no matter if it actually 5426 * changed between the two trees as we have done changes to 5427 * the inode before. If our inode is a directory and it's 5428 * waiting to be moved/renamed, we will send its utimes when 5429 * it's moved/renamed, therefore we don't need to do it here. 5430 */ 5431 sctx->send_progress = sctx->cur_ino + 1; 5432 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 5433 if (ret < 0) 5434 goto out; 5435 } 5436 5437out: 5438 return ret; 5439} 5440 5441static int changed_inode(struct send_ctx *sctx, 5442 enum btrfs_compare_tree_result result) 5443{ 5444 int ret = 0; 5445 struct btrfs_key *key = sctx->cmp_key; 5446 struct btrfs_inode_item *left_ii = NULL; 5447 struct btrfs_inode_item *right_ii = NULL; 5448 u64 left_gen = 0; 5449 u64 right_gen = 0; 5450 5451 sctx->cur_ino = key->objectid; 5452 sctx->cur_inode_new_gen = 0; 5453 sctx->cur_inode_last_extent = (u64)-1; 5454 5455 /* 5456 * Set send_progress to current inode. This will tell all get_cur_xxx 5457 * functions that the current inode's refs are not updated yet. Later, 5458 * when process_recorded_refs is finished, it is set to cur_ino + 1. 5459 */ 5460 sctx->send_progress = sctx->cur_ino; 5461 5462 if (result == BTRFS_COMPARE_TREE_NEW || 5463 result == BTRFS_COMPARE_TREE_CHANGED) { 5464 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], 5465 sctx->left_path->slots[0], 5466 struct btrfs_inode_item); 5467 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], 5468 left_ii); 5469 } else { 5470 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5471 sctx->right_path->slots[0], 5472 struct btrfs_inode_item); 5473 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5474 right_ii); 5475 } 5476 if (result == BTRFS_COMPARE_TREE_CHANGED) { 5477 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5478 sctx->right_path->slots[0], 5479 struct btrfs_inode_item); 5480 5481 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5482 right_ii); 5483 5484 /* 5485 * The cur_ino = root dir case is special here. We can't treat 5486 * the inode as deleted+reused because it would generate a 5487 * stream that tries to delete/mkdir the root dir. 5488 */ 5489 if (left_gen != right_gen && 5490 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5491 sctx->cur_inode_new_gen = 1; 5492 } 5493 5494 if (result == BTRFS_COMPARE_TREE_NEW) { 5495 sctx->cur_inode_gen = left_gen; 5496 sctx->cur_inode_new = 1; 5497 sctx->cur_inode_deleted = 0; 5498 sctx->cur_inode_size = btrfs_inode_size( 5499 sctx->left_path->nodes[0], left_ii); 5500 sctx->cur_inode_mode = btrfs_inode_mode( 5501 sctx->left_path->nodes[0], left_ii); 5502 sctx->cur_inode_rdev = btrfs_inode_rdev( 5503 sctx->left_path->nodes[0], left_ii); 5504 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5505 ret = send_create_inode_if_needed(sctx); 5506 } else if (result == BTRFS_COMPARE_TREE_DELETED) { 5507 sctx->cur_inode_gen = right_gen; 5508 sctx->cur_inode_new = 0; 5509 sctx->cur_inode_deleted = 1; 5510 sctx->cur_inode_size = btrfs_inode_size( 5511 sctx->right_path->nodes[0], right_ii); 5512 sctx->cur_inode_mode = btrfs_inode_mode( 5513 sctx->right_path->nodes[0], right_ii); 5514 } else if (result == BTRFS_COMPARE_TREE_CHANGED) { 5515 /* 5516 * We need to do some special handling in case the inode was 5517 * reported as changed with a changed generation number. This 5518 * means that the original inode was deleted and new inode 5519 * reused the same inum. So we have to treat the old inode as 5520 * deleted and the new one as new. 5521 */ 5522 if (sctx->cur_inode_new_gen) { 5523 /* 5524 * First, process the inode as if it was deleted. 5525 */ 5526 sctx->cur_inode_gen = right_gen; 5527 sctx->cur_inode_new = 0; 5528 sctx->cur_inode_deleted = 1; 5529 sctx->cur_inode_size = btrfs_inode_size( 5530 sctx->right_path->nodes[0], right_ii); 5531 sctx->cur_inode_mode = btrfs_inode_mode( 5532 sctx->right_path->nodes[0], right_ii); 5533 ret = process_all_refs(sctx, 5534 BTRFS_COMPARE_TREE_DELETED); 5535 if (ret < 0) 5536 goto out; 5537 5538 /* 5539 * Now process the inode as if it was new. 5540 */ 5541 sctx->cur_inode_gen = left_gen; 5542 sctx->cur_inode_new = 1; 5543 sctx->cur_inode_deleted = 0; 5544 sctx->cur_inode_size = btrfs_inode_size( 5545 sctx->left_path->nodes[0], left_ii); 5546 sctx->cur_inode_mode = btrfs_inode_mode( 5547 sctx->left_path->nodes[0], left_ii); 5548 sctx->cur_inode_rdev = btrfs_inode_rdev( 5549 sctx->left_path->nodes[0], left_ii); 5550 ret = send_create_inode_if_needed(sctx); 5551 if (ret < 0) 5552 goto out; 5553 5554 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); 5555 if (ret < 0) 5556 goto out; 5557 /* 5558 * Advance send_progress now as we did not get into 5559 * process_recorded_refs_if_needed in the new_gen case. 5560 */ 5561 sctx->send_progress = sctx->cur_ino + 1; 5562 5563 /* 5564 * Now process all extents and xattrs of the inode as if 5565 * they were all new. 5566 */ 5567 ret = process_all_extents(sctx); 5568 if (ret < 0) 5569 goto out; 5570 ret = process_all_new_xattrs(sctx); 5571 if (ret < 0) 5572 goto out; 5573 } else { 5574 sctx->cur_inode_gen = left_gen; 5575 sctx->cur_inode_new = 0; 5576 sctx->cur_inode_new_gen = 0; 5577 sctx->cur_inode_deleted = 0; 5578 sctx->cur_inode_size = btrfs_inode_size( 5579 sctx->left_path->nodes[0], left_ii); 5580 sctx->cur_inode_mode = btrfs_inode_mode( 5581 sctx->left_path->nodes[0], left_ii); 5582 } 5583 } 5584 5585out: 5586 return ret; 5587} 5588 5589/* 5590 * We have to process new refs before deleted refs, but compare_trees gives us 5591 * the new and deleted refs mixed. To fix this, we record the new/deleted refs 5592 * first and later process them in process_recorded_refs. 5593 * For the cur_inode_new_gen case, we skip recording completely because 5594 * changed_inode did already initiate processing of refs. The reason for this is 5595 * that in this case, compare_tree actually compares the refs of 2 different 5596 * inodes. To fix this, process_all_refs is used in changed_inode to handle all 5597 * refs of the right tree as deleted and all refs of the left tree as new. 5598 */ 5599static int changed_ref(struct send_ctx *sctx, 5600 enum btrfs_compare_tree_result result) 5601{ 5602 int ret = 0; 5603 5604 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5605 5606 if (!sctx->cur_inode_new_gen && 5607 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5608 if (result == BTRFS_COMPARE_TREE_NEW) 5609 ret = record_new_ref(sctx); 5610 else if (result == BTRFS_COMPARE_TREE_DELETED) 5611 ret = record_deleted_ref(sctx); 5612 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5613 ret = record_changed_ref(sctx); 5614 } 5615 5616 return ret; 5617} 5618 5619/* 5620 * Process new/deleted/changed xattrs. We skip processing in the 5621 * cur_inode_new_gen case because changed_inode did already initiate processing 5622 * of xattrs. The reason is the same as in changed_ref 5623 */ 5624static int changed_xattr(struct send_ctx *sctx, 5625 enum btrfs_compare_tree_result result) 5626{ 5627 int ret = 0; 5628 5629 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5630 5631 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5632 if (result == BTRFS_COMPARE_TREE_NEW) 5633 ret = process_new_xattr(sctx); 5634 else if (result == BTRFS_COMPARE_TREE_DELETED) 5635 ret = process_deleted_xattr(sctx); 5636 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5637 ret = process_changed_xattr(sctx); 5638 } 5639 5640 return ret; 5641} 5642 5643/* 5644 * Process new/deleted/changed extents. We skip processing in the 5645 * cur_inode_new_gen case because changed_inode did already initiate processing 5646 * of extents. The reason is the same as in changed_ref 5647 */ 5648static int changed_extent(struct send_ctx *sctx, 5649 enum btrfs_compare_tree_result result) 5650{ 5651 int ret = 0; 5652 5653 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5654 5655 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5656 if (result != BTRFS_COMPARE_TREE_DELETED) 5657 ret = process_extent(sctx, sctx->left_path, 5658 sctx->cmp_key); 5659 } 5660 5661 return ret; 5662} 5663 5664static int dir_changed(struct send_ctx *sctx, u64 dir) 5665{ 5666 u64 orig_gen, new_gen; 5667 int ret; 5668 5669 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, 5670 NULL, NULL); 5671 if (ret) 5672 return ret; 5673 5674 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, 5675 NULL, NULL, NULL); 5676 if (ret) 5677 return ret; 5678 5679 return (orig_gen != new_gen) ? 1 : 0; 5680} 5681 5682static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, 5683 struct btrfs_key *key) 5684{ 5685 struct btrfs_inode_extref *extref; 5686 struct extent_buffer *leaf; 5687 u64 dirid = 0, last_dirid = 0; 5688 unsigned long ptr; 5689 u32 item_size; 5690 u32 cur_offset = 0; 5691 int ref_name_len; 5692 int ret = 0; 5693 5694 /* Easy case, just check this one dirid */ 5695 if (key->type == BTRFS_INODE_REF_KEY) { 5696 dirid = key->offset; 5697 5698 ret = dir_changed(sctx, dirid); 5699 goto out; 5700 } 5701 5702 leaf = path->nodes[0]; 5703 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 5704 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 5705 while (cur_offset < item_size) { 5706 extref = (struct btrfs_inode_extref *)(ptr + 5707 cur_offset); 5708 dirid = btrfs_inode_extref_parent(leaf, extref); 5709 ref_name_len = btrfs_inode_extref_name_len(leaf, extref); 5710 cur_offset += ref_name_len + sizeof(*extref); 5711 if (dirid == last_dirid) 5712 continue; 5713 ret = dir_changed(sctx, dirid); 5714 if (ret) 5715 break; 5716 last_dirid = dirid; 5717 } 5718out: 5719 return ret; 5720} 5721 5722/* 5723 * Updates compare related fields in sctx and simply forwards to the actual 5724 * changed_xxx functions. 5725 */ 5726static int changed_cb(struct btrfs_root *left_root, 5727 struct btrfs_root *right_root, 5728 struct btrfs_path *left_path, 5729 struct btrfs_path *right_path, 5730 struct btrfs_key *key, 5731 enum btrfs_compare_tree_result result, 5732 void *ctx) 5733{ 5734 int ret = 0; 5735 struct send_ctx *sctx = ctx; 5736 5737 if (result == BTRFS_COMPARE_TREE_SAME) { 5738 if (key->type == BTRFS_INODE_REF_KEY || 5739 key->type == BTRFS_INODE_EXTREF_KEY) { 5740 ret = compare_refs(sctx, left_path, key); 5741 if (!ret) 5742 return 0; 5743 if (ret < 0) 5744 return ret; 5745 } else if (key->type == BTRFS_EXTENT_DATA_KEY) { 5746 return maybe_send_hole(sctx, left_path, key); 5747 } else { 5748 return 0; 5749 } 5750 result = BTRFS_COMPARE_TREE_CHANGED; 5751 ret = 0; 5752 } 5753 5754 sctx->left_path = left_path; 5755 sctx->right_path = right_path; 5756 sctx->cmp_key = key; 5757 5758 ret = finish_inode_if_needed(sctx, 0); 5759 if (ret < 0) 5760 goto out; 5761 5762 /* Ignore non-FS objects */ 5763 if (key->objectid == BTRFS_FREE_INO_OBJECTID || 5764 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 5765 goto out; 5766 5767 if (key->type == BTRFS_INODE_ITEM_KEY) 5768 ret = changed_inode(sctx, result); 5769 else if (key->type == BTRFS_INODE_REF_KEY || 5770 key->type == BTRFS_INODE_EXTREF_KEY) 5771 ret = changed_ref(sctx, result); 5772 else if (key->type == BTRFS_XATTR_ITEM_KEY) 5773 ret = changed_xattr(sctx, result); 5774 else if (key->type == BTRFS_EXTENT_DATA_KEY) 5775 ret = changed_extent(sctx, result); 5776 5777out: 5778 return ret; 5779} 5780 5781static int full_send_tree(struct send_ctx *sctx) 5782{ 5783 int ret; 5784 struct btrfs_root *send_root = sctx->send_root; 5785 struct btrfs_key key; 5786 struct btrfs_key found_key; 5787 struct btrfs_path *path; 5788 struct extent_buffer *eb; 5789 int slot; 5790 5791 path = alloc_path_for_send(); 5792 if (!path) 5793 return -ENOMEM; 5794 5795 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5796 key.type = BTRFS_INODE_ITEM_KEY; 5797 key.offset = 0; 5798 5799 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5800 if (ret < 0) 5801 goto out; 5802 if (ret) 5803 goto out_finish; 5804 5805 while (1) { 5806 eb = path->nodes[0]; 5807 slot = path->slots[0]; 5808 btrfs_item_key_to_cpu(eb, &found_key, slot); 5809 5810 ret = changed_cb(send_root, NULL, path, NULL, 5811 &found_key, BTRFS_COMPARE_TREE_NEW, sctx); 5812 if (ret < 0) 5813 goto out; 5814 5815 key.objectid = found_key.objectid; 5816 key.type = found_key.type; 5817 key.offset = found_key.offset + 1; 5818 5819 ret = btrfs_next_item(send_root, path); 5820 if (ret < 0) 5821 goto out; 5822 if (ret) { 5823 ret = 0; 5824 break; 5825 } 5826 } 5827 5828out_finish: 5829 ret = finish_inode_if_needed(sctx, 1); 5830 5831out: 5832 btrfs_free_path(path); 5833 return ret; 5834} 5835 5836static int send_subvol(struct send_ctx *sctx) 5837{ 5838 int ret; 5839 5840 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) { 5841 ret = send_header(sctx); 5842 if (ret < 0) 5843 goto out; 5844 } 5845 5846 ret = send_subvol_begin(sctx); 5847 if (ret < 0) 5848 goto out; 5849 5850 if (sctx->parent_root) { 5851 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, 5852 changed_cb, sctx); 5853 if (ret < 0) 5854 goto out; 5855 ret = finish_inode_if_needed(sctx, 1); 5856 if (ret < 0) 5857 goto out; 5858 } else { 5859 ret = full_send_tree(sctx); 5860 if (ret < 0) 5861 goto out; 5862 } 5863 5864out: 5865 free_recorded_refs(sctx); 5866 return ret; 5867} 5868 5869/* 5870 * If orphan cleanup did remove any orphans from a root, it means the tree 5871 * was modified and therefore the commit root is not the same as the current 5872 * root anymore. This is a problem, because send uses the commit root and 5873 * therefore can see inode items that don't exist in the current root anymore, 5874 * and for example make calls to btrfs_iget, which will do tree lookups based 5875 * on the current root and not on the commit root. Those lookups will fail, 5876 * returning a -ESTALE error, and making send fail with that error. So make 5877 * sure a send does not see any orphans we have just removed, and that it will 5878 * see the same inodes regardless of whether a transaction commit happened 5879 * before it started (meaning that the commit root will be the same as the 5880 * current root) or not. 5881 */ 5882static int ensure_commit_roots_uptodate(struct send_ctx *sctx) 5883{ 5884 int i; 5885 struct btrfs_trans_handle *trans = NULL; 5886 5887again: 5888 if (sctx->parent_root && 5889 sctx->parent_root->node != sctx->parent_root->commit_root) 5890 goto commit_trans; 5891 5892 for (i = 0; i < sctx->clone_roots_cnt; i++) 5893 if (sctx->clone_roots[i].root->node != 5894 sctx->clone_roots[i].root->commit_root) 5895 goto commit_trans; 5896 5897 if (trans) 5898 return btrfs_end_transaction(trans, sctx->send_root); 5899 5900 return 0; 5901 5902commit_trans: 5903 /* Use any root, all fs roots will get their commit roots updated. */ 5904 if (!trans) { 5905 trans = btrfs_join_transaction(sctx->send_root); 5906 if (IS_ERR(trans)) 5907 return PTR_ERR(trans); 5908 goto again; 5909 } 5910 5911 return btrfs_commit_transaction(trans, sctx->send_root); 5912} 5913 5914static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 5915{ 5916 spin_lock(&root->root_item_lock); 5917 root->send_in_progress--; 5918 /* 5919 * Not much left to do, we don't know why it's unbalanced and 5920 * can't blindly reset it to 0. 5921 */ 5922 if (root->send_in_progress < 0) 5923 btrfs_err(root->fs_info, 5924 "send_in_progres unbalanced %d root %llu", 5925 root->send_in_progress, root->root_key.objectid); 5926 spin_unlock(&root->root_item_lock); 5927} 5928 5929long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5930{ 5931 int ret = 0; 5932 struct btrfs_root *send_root; 5933 struct btrfs_root *clone_root; 5934 struct btrfs_fs_info *fs_info; 5935 struct btrfs_ioctl_send_args *arg = NULL; 5936 struct btrfs_key key; 5937 struct send_ctx *sctx = NULL; 5938 u32 i; 5939 u64 *clone_sources_tmp = NULL; 5940 int clone_sources_to_rollback = 0; 5941 int sort_clone_roots = 0; 5942 int index; 5943 5944 if (!capable(CAP_SYS_ADMIN)) 5945 return -EPERM; 5946 5947 send_root = BTRFS_I(file_inode(mnt_file))->root; 5948 fs_info = send_root->fs_info; 5949 5950 /* 5951 * The subvolume must remain read-only during send, protect against 5952 * making it RW. This also protects against deletion. 5953 */ 5954 spin_lock(&send_root->root_item_lock); 5955 send_root->send_in_progress++; 5956 spin_unlock(&send_root->root_item_lock); 5957 5958 /* 5959 * This is done when we lookup the root, it should already be complete 5960 * by the time we get here. 5961 */ 5962 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5963 5964 /* 5965 * Userspace tools do the checks and warn the user if it's 5966 * not RO. 5967 */ 5968 if (!btrfs_root_readonly(send_root)) { 5969 ret = -EPERM; 5970 goto out; 5971 } 5972 5973 arg = memdup_user(arg_, sizeof(*arg)); 5974 if (IS_ERR(arg)) { 5975 ret = PTR_ERR(arg); 5976 arg = NULL; 5977 goto out; 5978 } 5979 5980 if (!access_ok(VERIFY_READ, arg->clone_sources, 5981 sizeof(*arg->clone_sources) * 5982 arg->clone_sources_count)) { 5983 ret = -EFAULT; 5984 goto out; 5985 } 5986 5987 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { 5988 ret = -EINVAL; 5989 goto out; 5990 } 5991 5992 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 5993 if (!sctx) { 5994 ret = -ENOMEM; 5995 goto out; 5996 } 5997 5998 INIT_LIST_HEAD(&sctx->new_refs); 5999 INIT_LIST_HEAD(&sctx->deleted_refs); 6000 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 6001 INIT_LIST_HEAD(&sctx->name_cache_list); 6002 6003 sctx->flags = arg->flags; 6004 6005 sctx->send_filp = fget(arg->send_fd); 6006 if (!sctx->send_filp) { 6007 ret = -EBADF; 6008 goto out; 6009 } 6010 6011 sctx->send_root = send_root; 6012 /* 6013 * Unlikely but possible, if the subvolume is marked for deletion but 6014 * is slow to remove the directory entry, send can still be started 6015 */ 6016 if (btrfs_root_dead(sctx->send_root)) { 6017 ret = -EPERM; 6018 goto out; 6019 } 6020 6021 sctx->clone_roots_cnt = arg->clone_sources_count; 6022 6023 sctx->send_max_size = BTRFS_SEND_BUF_SIZE; 6024 sctx->send_buf = vmalloc(sctx->send_max_size); 6025 if (!sctx->send_buf) { 6026 ret = -ENOMEM; 6027 goto out; 6028 } 6029 6030 sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); 6031 if (!sctx->read_buf) { 6032 ret = -ENOMEM; 6033 goto out; 6034 } 6035 6036 sctx->pending_dir_moves = RB_ROOT; 6037 sctx->waiting_dir_moves = RB_ROOT; 6038 sctx->orphan_dirs = RB_ROOT; 6039 6040 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 6041 (arg->clone_sources_count + 1)); 6042 if (!sctx->clone_roots) { 6043 ret = -ENOMEM; 6044 goto out; 6045 } 6046 6047 if (arg->clone_sources_count) { 6048 clone_sources_tmp = vmalloc(arg->clone_sources_count * 6049 sizeof(*arg->clone_sources)); 6050 if (!clone_sources_tmp) { 6051 ret = -ENOMEM; 6052 goto out; 6053 } 6054 6055 ret = copy_from_user(clone_sources_tmp, arg->clone_sources, 6056 arg->clone_sources_count * 6057 sizeof(*arg->clone_sources)); 6058 if (ret) { 6059 ret = -EFAULT; 6060 goto out; 6061 } 6062 6063 for (i = 0; i < arg->clone_sources_count; i++) { 6064 key.objectid = clone_sources_tmp[i]; 6065 key.type = BTRFS_ROOT_ITEM_KEY; 6066 key.offset = (u64)-1; 6067 6068 index = srcu_read_lock(&fs_info->subvol_srcu); 6069 6070 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 6071 if (IS_ERR(clone_root)) { 6072 srcu_read_unlock(&fs_info->subvol_srcu, index); 6073 ret = PTR_ERR(clone_root); 6074 goto out; 6075 } 6076 spin_lock(&clone_root->root_item_lock); 6077 if (!btrfs_root_readonly(clone_root) || 6078 btrfs_root_dead(clone_root)) { 6079 spin_unlock(&clone_root->root_item_lock); 6080 srcu_read_unlock(&fs_info->subvol_srcu, index); 6081 ret = -EPERM; 6082 goto out; 6083 } 6084 clone_root->send_in_progress++; 6085 spin_unlock(&clone_root->root_item_lock); 6086 srcu_read_unlock(&fs_info->subvol_srcu, index); 6087 6088 sctx->clone_roots[i].root = clone_root; 6089 clone_sources_to_rollback = i + 1; 6090 } 6091 vfree(clone_sources_tmp); 6092 clone_sources_tmp = NULL; 6093 } 6094 6095 if (arg->parent_root) { 6096 key.objectid = arg->parent_root; 6097 key.type = BTRFS_ROOT_ITEM_KEY; 6098 key.offset = (u64)-1; 6099 6100 index = srcu_read_lock(&fs_info->subvol_srcu); 6101 6102 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 6103 if (IS_ERR(sctx->parent_root)) { 6104 srcu_read_unlock(&fs_info->subvol_srcu, index); 6105 ret = PTR_ERR(sctx->parent_root); 6106 goto out; 6107 } 6108 6109 spin_lock(&sctx->parent_root->root_item_lock); 6110 sctx->parent_root->send_in_progress++; 6111 if (!btrfs_root_readonly(sctx->parent_root) || 6112 btrfs_root_dead(sctx->parent_root)) { 6113 spin_unlock(&sctx->parent_root->root_item_lock); 6114 srcu_read_unlock(&fs_info->subvol_srcu, index); 6115 ret = -EPERM; 6116 goto out; 6117 } 6118 spin_unlock(&sctx->parent_root->root_item_lock); 6119 6120 srcu_read_unlock(&fs_info->subvol_srcu, index); 6121 } 6122 6123 /* 6124 * Clones from send_root are allowed, but only if the clone source 6125 * is behind the current send position. This is checked while searching 6126 * for possible clone sources. 6127 */ 6128 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; 6129 6130 /* We do a bsearch later */ 6131 sort(sctx->clone_roots, sctx->clone_roots_cnt, 6132 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 6133 NULL); 6134 sort_clone_roots = 1; 6135 6136 ret = ensure_commit_roots_uptodate(sctx); 6137 if (ret) 6138 goto out; 6139 6140 current->journal_info = BTRFS_SEND_TRANS_STUB; 6141 ret = send_subvol(sctx); 6142 current->journal_info = NULL; 6143 if (ret < 0) 6144 goto out; 6145 6146 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) { 6147 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 6148 if (ret < 0) 6149 goto out; 6150 ret = send_cmd(sctx); 6151 if (ret < 0) 6152 goto out; 6153 } 6154 6155out: 6156 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); 6157 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { 6158 struct rb_node *n; 6159 struct pending_dir_move *pm; 6160 6161 n = rb_first(&sctx->pending_dir_moves); 6162 pm = rb_entry(n, struct pending_dir_move, node); 6163 while (!list_empty(&pm->list)) { 6164 struct pending_dir_move *pm2; 6165 6166 pm2 = list_first_entry(&pm->list, 6167 struct pending_dir_move, list); 6168 free_pending_move(sctx, pm2); 6169 } 6170 free_pending_move(sctx, pm); 6171 } 6172 6173 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); 6174 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { 6175 struct rb_node *n; 6176 struct waiting_dir_move *dm; 6177 6178 n = rb_first(&sctx->waiting_dir_moves); 6179 dm = rb_entry(n, struct waiting_dir_move, node); 6180 rb_erase(&dm->node, &sctx->waiting_dir_moves); 6181 kfree(dm); 6182 } 6183 6184 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); 6185 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { 6186 struct rb_node *n; 6187 struct orphan_dir_info *odi; 6188 6189 n = rb_first(&sctx->orphan_dirs); 6190 odi = rb_entry(n, struct orphan_dir_info, node); 6191 free_orphan_dir_info(sctx, odi); 6192 } 6193 6194 if (sort_clone_roots) { 6195 for (i = 0; i < sctx->clone_roots_cnt; i++) 6196 btrfs_root_dec_send_in_progress( 6197 sctx->clone_roots[i].root); 6198 } else { 6199 for (i = 0; sctx && i < clone_sources_to_rollback; i++) 6200 btrfs_root_dec_send_in_progress( 6201 sctx->clone_roots[i].root); 6202 6203 btrfs_root_dec_send_in_progress(send_root); 6204 } 6205 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) 6206 btrfs_root_dec_send_in_progress(sctx->parent_root); 6207 6208 kfree(arg); 6209 vfree(clone_sources_tmp); 6210 6211 if (sctx) { 6212 if (sctx->send_filp) 6213 fput(sctx->send_filp); 6214 6215 vfree(sctx->clone_roots); 6216 vfree(sctx->send_buf); 6217 vfree(sctx->read_buf); 6218 6219 name_cache_free(sctx); 6220 6221 kfree(sctx); 6222 } 6223 6224 return ret; 6225} 6226