This source file includes following definitions.
- block_group_bits
- btrfs_add_excluded_extent
- btrfs_free_excluded_extents
- generic_ref_to_space_flags
- add_pinned_bytes
- sub_pinned_bytes
- btrfs_lookup_data_extent
- btrfs_lookup_extent_info
- btrfs_get_extent_inline_ref_type
- hash_extent_data_ref
- hash_extent_data_ref_item
- match_extent_data_ref
- lookup_extent_data_ref
- insert_extent_data_ref
- remove_extent_data_ref
- extent_data_ref_count
- lookup_tree_block_ref
- insert_tree_block_ref
- extent_ref_type
- find_next_key
- lookup_inline_extent_backref
- setup_inline_extent_backref
- lookup_extent_backref
- update_inline_extent_backref
- insert_inline_extent_backref
- insert_extent_backref
- remove_extent_backref
- btrfs_issue_discard
- btrfs_discard_extent
- btrfs_inc_extent_ref
- __btrfs_inc_extent_ref
- run_delayed_data_ref
- __run_delayed_extent_op
- run_delayed_extent_op
- run_delayed_tree_ref
- run_one_delayed_ref
- select_delayed_ref
- unselect_delayed_ref_head
- cleanup_extent_op
- run_and_cleanup_extent_op
- btrfs_cleanup_ref_head_accounting
- cleanup_ref_head
- btrfs_obtain_ref_head
- btrfs_run_delayed_refs_for_head
- __btrfs_run_delayed_refs
- find_middle
- heads_to_leaves
- btrfs_csum_bytes_to_leaves
- btrfs_run_delayed_refs
- btrfs_set_disk_extent_flags
- check_delayed_ref
- check_committed_ref
- btrfs_cross_ref_exist
- __btrfs_mod_ref
- btrfs_inc_ref
- btrfs_dec_ref
- btrfs_extent_readonly
- get_alloc_profile_by_root
- first_logical_byte
- pin_down_extent
- btrfs_pin_extent
- btrfs_pin_extent_for_log_replay
- __exclude_logged_extent
- btrfs_exclude_logged_extents
- btrfs_inc_block_group_reservations
- btrfs_prepare_extent_commit
- fetch_cluster_info
- unpin_extent_range
- btrfs_finish_extent_commit
- __btrfs_free_extent
- check_ref_cleanup
- btrfs_free_tree_block
- btrfs_free_extent
- btrfs_lock_block_group
- btrfs_grab_block_group
- btrfs_lock_cluster
- btrfs_release_block_group
- find_free_extent_clustered
- find_free_extent_unclustered
- find_free_extent_update_loop
- find_free_extent
- btrfs_reserve_extent
- __btrfs_free_reserved_extent
- btrfs_free_reserved_extent
- btrfs_free_and_pin_reserved_extent
- alloc_reserved_file_extent
- alloc_reserved_tree_block
- btrfs_alloc_reserved_file_extent
- btrfs_alloc_logged_file_extent
- btrfs_init_new_buffer
- btrfs_alloc_tree_block
- reada_walk_down
- walk_down_proc
- check_ref_exists
- do_walk_down
- walk_up_proc
- walk_down_tree
- walk_up_tree
- btrfs_drop_snapshot
- btrfs_drop_subtree
- btrfs_account_ro_block_groups_free_space
- btrfs_error_unpin_extent_range
- btrfs_trim_free_extents
- btrfs_trim_fs
- btrfs_end_write_no_snapshotting
- btrfs_start_write_no_snapshotting
- btrfs_wait_for_snapshot_creation
1
2
3
4
5
6 #include <linux/sched.h>
7 #include <linux/sched/signal.h>
8 #include <linux/pagemap.h>
9 #include <linux/writeback.h>
10 #include <linux/blkdev.h>
11 #include <linux/sort.h>
12 #include <linux/rcupdate.h>
13 #include <linux/kthread.h>
14 #include <linux/slab.h>
15 #include <linux/ratelimit.h>
16 #include <linux/percpu_counter.h>
17 #include <linux/lockdep.h>
18 #include <linux/crc32c.h>
19 #include "misc.h"
20 #include "tree-log.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "volumes.h"
24 #include "raid56.h"
25 #include "locking.h"
26 #include "free-space-cache.h"
27 #include "free-space-tree.h"
28 #include "sysfs.h"
29 #include "qgroup.h"
30 #include "ref-verify.h"
31 #include "space-info.h"
32 #include "block-rsv.h"
33 #include "delalloc-space.h"
34 #include "block-group.h"
35
36 #undef SCRAMBLE_DELAYED_REFS
37
38
39 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
40 struct btrfs_delayed_ref_node *node, u64 parent,
41 u64 root_objectid, u64 owner_objectid,
42 u64 owner_offset, int refs_to_drop,
43 struct btrfs_delayed_extent_op *extra_op);
44 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
45 struct extent_buffer *leaf,
46 struct btrfs_extent_item *ei);
47 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
48 u64 parent, u64 root_objectid,
49 u64 flags, u64 owner, u64 offset,
50 struct btrfs_key *ins, int ref_mod);
51 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
52 struct btrfs_delayed_ref_node *node,
53 struct btrfs_delayed_extent_op *extent_op);
54 static int find_next_key(struct btrfs_path *path, int level,
55 struct btrfs_key *key);
56
57 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
58 {
59 return (cache->flags & bits) == bits;
60 }
61
62 int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
63 u64 start, u64 num_bytes)
64 {
65 u64 end = start + num_bytes - 1;
66 set_extent_bits(&fs_info->freed_extents[0],
67 start, end, EXTENT_UPTODATE);
68 set_extent_bits(&fs_info->freed_extents[1],
69 start, end, EXTENT_UPTODATE);
70 return 0;
71 }
72
73 void btrfs_free_excluded_extents(struct btrfs_block_group_cache *cache)
74 {
75 struct btrfs_fs_info *fs_info = cache->fs_info;
76 u64 start, end;
77
78 start = cache->key.objectid;
79 end = start + cache->key.offset - 1;
80
81 clear_extent_bits(&fs_info->freed_extents[0],
82 start, end, EXTENT_UPTODATE);
83 clear_extent_bits(&fs_info->freed_extents[1],
84 start, end, EXTENT_UPTODATE);
85 }
86
87 static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
88 {
89 if (ref->type == BTRFS_REF_METADATA) {
90 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
91 return BTRFS_BLOCK_GROUP_SYSTEM;
92 else
93 return BTRFS_BLOCK_GROUP_METADATA;
94 }
95 return BTRFS_BLOCK_GROUP_DATA;
96 }
97
98 static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
99 struct btrfs_ref *ref)
100 {
101 struct btrfs_space_info *space_info;
102 u64 flags = generic_ref_to_space_flags(ref);
103
104 space_info = btrfs_find_space_info(fs_info, flags);
105 ASSERT(space_info);
106 percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
107 BTRFS_TOTAL_BYTES_PINNED_BATCH);
108 }
109
110 static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
111 struct btrfs_ref *ref)
112 {
113 struct btrfs_space_info *space_info;
114 u64 flags = generic_ref_to_space_flags(ref);
115
116 space_info = btrfs_find_space_info(fs_info, flags);
117 ASSERT(space_info);
118 percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
119 BTRFS_TOTAL_BYTES_PINNED_BATCH);
120 }
121
122
123 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
124 {
125 int ret;
126 struct btrfs_key key;
127 struct btrfs_path *path;
128
129 path = btrfs_alloc_path();
130 if (!path)
131 return -ENOMEM;
132
133 key.objectid = start;
134 key.offset = len;
135 key.type = BTRFS_EXTENT_ITEM_KEY;
136 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
137 btrfs_free_path(path);
138 return ret;
139 }
140
141
142
143
144
145
146
147
148
149
150 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
151 struct btrfs_fs_info *fs_info, u64 bytenr,
152 u64 offset, int metadata, u64 *refs, u64 *flags)
153 {
154 struct btrfs_delayed_ref_head *head;
155 struct btrfs_delayed_ref_root *delayed_refs;
156 struct btrfs_path *path;
157 struct btrfs_extent_item *ei;
158 struct extent_buffer *leaf;
159 struct btrfs_key key;
160 u32 item_size;
161 u64 num_refs;
162 u64 extent_flags;
163 int ret;
164
165
166
167
168
169 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
170 offset = fs_info->nodesize;
171 metadata = 0;
172 }
173
174 path = btrfs_alloc_path();
175 if (!path)
176 return -ENOMEM;
177
178 if (!trans) {
179 path->skip_locking = 1;
180 path->search_commit_root = 1;
181 }
182
183 search_again:
184 key.objectid = bytenr;
185 key.offset = offset;
186 if (metadata)
187 key.type = BTRFS_METADATA_ITEM_KEY;
188 else
189 key.type = BTRFS_EXTENT_ITEM_KEY;
190
191 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
192 if (ret < 0)
193 goto out_free;
194
195 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
196 if (path->slots[0]) {
197 path->slots[0]--;
198 btrfs_item_key_to_cpu(path->nodes[0], &key,
199 path->slots[0]);
200 if (key.objectid == bytenr &&
201 key.type == BTRFS_EXTENT_ITEM_KEY &&
202 key.offset == fs_info->nodesize)
203 ret = 0;
204 }
205 }
206
207 if (ret == 0) {
208 leaf = path->nodes[0];
209 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
210 if (item_size >= sizeof(*ei)) {
211 ei = btrfs_item_ptr(leaf, path->slots[0],
212 struct btrfs_extent_item);
213 num_refs = btrfs_extent_refs(leaf, ei);
214 extent_flags = btrfs_extent_flags(leaf, ei);
215 } else {
216 ret = -EINVAL;
217 btrfs_print_v0_err(fs_info);
218 if (trans)
219 btrfs_abort_transaction(trans, ret);
220 else
221 btrfs_handle_fs_error(fs_info, ret, NULL);
222
223 goto out_free;
224 }
225
226 BUG_ON(num_refs == 0);
227 } else {
228 num_refs = 0;
229 extent_flags = 0;
230 ret = 0;
231 }
232
233 if (!trans)
234 goto out;
235
236 delayed_refs = &trans->transaction->delayed_refs;
237 spin_lock(&delayed_refs->lock);
238 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
239 if (head) {
240 if (!mutex_trylock(&head->mutex)) {
241 refcount_inc(&head->refs);
242 spin_unlock(&delayed_refs->lock);
243
244 btrfs_release_path(path);
245
246
247
248
249
250 mutex_lock(&head->mutex);
251 mutex_unlock(&head->mutex);
252 btrfs_put_delayed_ref_head(head);
253 goto search_again;
254 }
255 spin_lock(&head->lock);
256 if (head->extent_op && head->extent_op->update_flags)
257 extent_flags |= head->extent_op->flags_to_set;
258 else
259 BUG_ON(num_refs == 0);
260
261 num_refs += head->ref_mod;
262 spin_unlock(&head->lock);
263 mutex_unlock(&head->mutex);
264 }
265 spin_unlock(&delayed_refs->lock);
266 out:
267 WARN_ON(num_refs == 0);
268 if (refs)
269 *refs = num_refs;
270 if (flags)
271 *flags = extent_flags;
272 out_free:
273 btrfs_free_path(path);
274 return ret;
275 }
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388 int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
389 struct btrfs_extent_inline_ref *iref,
390 enum btrfs_inline_ref_type is_data)
391 {
392 int type = btrfs_extent_inline_ref_type(eb, iref);
393 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
394
395 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
396 type == BTRFS_SHARED_BLOCK_REF_KEY ||
397 type == BTRFS_SHARED_DATA_REF_KEY ||
398 type == BTRFS_EXTENT_DATA_REF_KEY) {
399 if (is_data == BTRFS_REF_TYPE_BLOCK) {
400 if (type == BTRFS_TREE_BLOCK_REF_KEY)
401 return type;
402 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
403 ASSERT(eb->fs_info);
404
405
406
407
408
409 if (offset &&
410 IS_ALIGNED(offset, eb->fs_info->nodesize))
411 return type;
412 }
413 } else if (is_data == BTRFS_REF_TYPE_DATA) {
414 if (type == BTRFS_EXTENT_DATA_REF_KEY)
415 return type;
416 if (type == BTRFS_SHARED_DATA_REF_KEY) {
417 ASSERT(eb->fs_info);
418
419
420
421
422
423 if (offset &&
424 IS_ALIGNED(offset, eb->fs_info->nodesize))
425 return type;
426 }
427 } else {
428 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
429 return type;
430 }
431 }
432
433 btrfs_print_leaf((struct extent_buffer *)eb);
434 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
435 eb->start, type);
436 WARN_ON(1);
437
438 return BTRFS_REF_TYPE_INVALID;
439 }
440
441 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
442 {
443 u32 high_crc = ~(u32)0;
444 u32 low_crc = ~(u32)0;
445 __le64 lenum;
446
447 lenum = cpu_to_le64(root_objectid);
448 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
449 lenum = cpu_to_le64(owner);
450 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
451 lenum = cpu_to_le64(offset);
452 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
453
454 return ((u64)high_crc << 31) ^ (u64)low_crc;
455 }
456
457 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
458 struct btrfs_extent_data_ref *ref)
459 {
460 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
461 btrfs_extent_data_ref_objectid(leaf, ref),
462 btrfs_extent_data_ref_offset(leaf, ref));
463 }
464
465 static int match_extent_data_ref(struct extent_buffer *leaf,
466 struct btrfs_extent_data_ref *ref,
467 u64 root_objectid, u64 owner, u64 offset)
468 {
469 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
470 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
471 btrfs_extent_data_ref_offset(leaf, ref) != offset)
472 return 0;
473 return 1;
474 }
475
476 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
477 struct btrfs_path *path,
478 u64 bytenr, u64 parent,
479 u64 root_objectid,
480 u64 owner, u64 offset)
481 {
482 struct btrfs_root *root = trans->fs_info->extent_root;
483 struct btrfs_key key;
484 struct btrfs_extent_data_ref *ref;
485 struct extent_buffer *leaf;
486 u32 nritems;
487 int ret;
488 int recow;
489 int err = -ENOENT;
490
491 key.objectid = bytenr;
492 if (parent) {
493 key.type = BTRFS_SHARED_DATA_REF_KEY;
494 key.offset = parent;
495 } else {
496 key.type = BTRFS_EXTENT_DATA_REF_KEY;
497 key.offset = hash_extent_data_ref(root_objectid,
498 owner, offset);
499 }
500 again:
501 recow = 0;
502 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
503 if (ret < 0) {
504 err = ret;
505 goto fail;
506 }
507
508 if (parent) {
509 if (!ret)
510 return 0;
511 goto fail;
512 }
513
514 leaf = path->nodes[0];
515 nritems = btrfs_header_nritems(leaf);
516 while (1) {
517 if (path->slots[0] >= nritems) {
518 ret = btrfs_next_leaf(root, path);
519 if (ret < 0)
520 err = ret;
521 if (ret)
522 goto fail;
523
524 leaf = path->nodes[0];
525 nritems = btrfs_header_nritems(leaf);
526 recow = 1;
527 }
528
529 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
530 if (key.objectid != bytenr ||
531 key.type != BTRFS_EXTENT_DATA_REF_KEY)
532 goto fail;
533
534 ref = btrfs_item_ptr(leaf, path->slots[0],
535 struct btrfs_extent_data_ref);
536
537 if (match_extent_data_ref(leaf, ref, root_objectid,
538 owner, offset)) {
539 if (recow) {
540 btrfs_release_path(path);
541 goto again;
542 }
543 err = 0;
544 break;
545 }
546 path->slots[0]++;
547 }
548 fail:
549 return err;
550 }
551
552 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
553 struct btrfs_path *path,
554 u64 bytenr, u64 parent,
555 u64 root_objectid, u64 owner,
556 u64 offset, int refs_to_add)
557 {
558 struct btrfs_root *root = trans->fs_info->extent_root;
559 struct btrfs_key key;
560 struct extent_buffer *leaf;
561 u32 size;
562 u32 num_refs;
563 int ret;
564
565 key.objectid = bytenr;
566 if (parent) {
567 key.type = BTRFS_SHARED_DATA_REF_KEY;
568 key.offset = parent;
569 size = sizeof(struct btrfs_shared_data_ref);
570 } else {
571 key.type = BTRFS_EXTENT_DATA_REF_KEY;
572 key.offset = hash_extent_data_ref(root_objectid,
573 owner, offset);
574 size = sizeof(struct btrfs_extent_data_ref);
575 }
576
577 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
578 if (ret && ret != -EEXIST)
579 goto fail;
580
581 leaf = path->nodes[0];
582 if (parent) {
583 struct btrfs_shared_data_ref *ref;
584 ref = btrfs_item_ptr(leaf, path->slots[0],
585 struct btrfs_shared_data_ref);
586 if (ret == 0) {
587 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
588 } else {
589 num_refs = btrfs_shared_data_ref_count(leaf, ref);
590 num_refs += refs_to_add;
591 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
592 }
593 } else {
594 struct btrfs_extent_data_ref *ref;
595 while (ret == -EEXIST) {
596 ref = btrfs_item_ptr(leaf, path->slots[0],
597 struct btrfs_extent_data_ref);
598 if (match_extent_data_ref(leaf, ref, root_objectid,
599 owner, offset))
600 break;
601 btrfs_release_path(path);
602 key.offset++;
603 ret = btrfs_insert_empty_item(trans, root, path, &key,
604 size);
605 if (ret && ret != -EEXIST)
606 goto fail;
607
608 leaf = path->nodes[0];
609 }
610 ref = btrfs_item_ptr(leaf, path->slots[0],
611 struct btrfs_extent_data_ref);
612 if (ret == 0) {
613 btrfs_set_extent_data_ref_root(leaf, ref,
614 root_objectid);
615 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
616 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
617 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
618 } else {
619 num_refs = btrfs_extent_data_ref_count(leaf, ref);
620 num_refs += refs_to_add;
621 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
622 }
623 }
624 btrfs_mark_buffer_dirty(leaf);
625 ret = 0;
626 fail:
627 btrfs_release_path(path);
628 return ret;
629 }
630
631 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
632 struct btrfs_path *path,
633 int refs_to_drop, int *last_ref)
634 {
635 struct btrfs_key key;
636 struct btrfs_extent_data_ref *ref1 = NULL;
637 struct btrfs_shared_data_ref *ref2 = NULL;
638 struct extent_buffer *leaf;
639 u32 num_refs = 0;
640 int ret = 0;
641
642 leaf = path->nodes[0];
643 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
644
645 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
646 ref1 = btrfs_item_ptr(leaf, path->slots[0],
647 struct btrfs_extent_data_ref);
648 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
649 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
650 ref2 = btrfs_item_ptr(leaf, path->slots[0],
651 struct btrfs_shared_data_ref);
652 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
653 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
654 btrfs_print_v0_err(trans->fs_info);
655 btrfs_abort_transaction(trans, -EINVAL);
656 return -EINVAL;
657 } else {
658 BUG();
659 }
660
661 BUG_ON(num_refs < refs_to_drop);
662 num_refs -= refs_to_drop;
663
664 if (num_refs == 0) {
665 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
666 *last_ref = 1;
667 } else {
668 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
669 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
670 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
671 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
672 btrfs_mark_buffer_dirty(leaf);
673 }
674 return ret;
675 }
676
677 static noinline u32 extent_data_ref_count(struct btrfs_path *path,
678 struct btrfs_extent_inline_ref *iref)
679 {
680 struct btrfs_key key;
681 struct extent_buffer *leaf;
682 struct btrfs_extent_data_ref *ref1;
683 struct btrfs_shared_data_ref *ref2;
684 u32 num_refs = 0;
685 int type;
686
687 leaf = path->nodes[0];
688 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
689
690 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
691 if (iref) {
692
693
694
695
696 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
697 ASSERT(type != BTRFS_REF_TYPE_INVALID);
698 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
699 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
700 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
701 } else {
702 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
703 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
704 }
705 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
706 ref1 = btrfs_item_ptr(leaf, path->slots[0],
707 struct btrfs_extent_data_ref);
708 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
709 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
710 ref2 = btrfs_item_ptr(leaf, path->slots[0],
711 struct btrfs_shared_data_ref);
712 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
713 } else {
714 WARN_ON(1);
715 }
716 return num_refs;
717 }
718
719 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
720 struct btrfs_path *path,
721 u64 bytenr, u64 parent,
722 u64 root_objectid)
723 {
724 struct btrfs_root *root = trans->fs_info->extent_root;
725 struct btrfs_key key;
726 int ret;
727
728 key.objectid = bytenr;
729 if (parent) {
730 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
731 key.offset = parent;
732 } else {
733 key.type = BTRFS_TREE_BLOCK_REF_KEY;
734 key.offset = root_objectid;
735 }
736
737 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
738 if (ret > 0)
739 ret = -ENOENT;
740 return ret;
741 }
742
743 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
744 struct btrfs_path *path,
745 u64 bytenr, u64 parent,
746 u64 root_objectid)
747 {
748 struct btrfs_key key;
749 int ret;
750
751 key.objectid = bytenr;
752 if (parent) {
753 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
754 key.offset = parent;
755 } else {
756 key.type = BTRFS_TREE_BLOCK_REF_KEY;
757 key.offset = root_objectid;
758 }
759
760 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
761 path, &key, 0);
762 btrfs_release_path(path);
763 return ret;
764 }
765
766 static inline int extent_ref_type(u64 parent, u64 owner)
767 {
768 int type;
769 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
770 if (parent > 0)
771 type = BTRFS_SHARED_BLOCK_REF_KEY;
772 else
773 type = BTRFS_TREE_BLOCK_REF_KEY;
774 } else {
775 if (parent > 0)
776 type = BTRFS_SHARED_DATA_REF_KEY;
777 else
778 type = BTRFS_EXTENT_DATA_REF_KEY;
779 }
780 return type;
781 }
782
783 static int find_next_key(struct btrfs_path *path, int level,
784 struct btrfs_key *key)
785
786 {
787 for (; level < BTRFS_MAX_LEVEL; level++) {
788 if (!path->nodes[level])
789 break;
790 if (path->slots[level] + 1 >=
791 btrfs_header_nritems(path->nodes[level]))
792 continue;
793 if (level == 0)
794 btrfs_item_key_to_cpu(path->nodes[level], key,
795 path->slots[level] + 1);
796 else
797 btrfs_node_key_to_cpu(path->nodes[level], key,
798 path->slots[level] + 1);
799 return 0;
800 }
801 return 1;
802 }
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817 static noinline_for_stack
818 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
819 struct btrfs_path *path,
820 struct btrfs_extent_inline_ref **ref_ret,
821 u64 bytenr, u64 num_bytes,
822 u64 parent, u64 root_objectid,
823 u64 owner, u64 offset, int insert)
824 {
825 struct btrfs_fs_info *fs_info = trans->fs_info;
826 struct btrfs_root *root = fs_info->extent_root;
827 struct btrfs_key key;
828 struct extent_buffer *leaf;
829 struct btrfs_extent_item *ei;
830 struct btrfs_extent_inline_ref *iref;
831 u64 flags;
832 u64 item_size;
833 unsigned long ptr;
834 unsigned long end;
835 int extra_size;
836 int type;
837 int want;
838 int ret;
839 int err = 0;
840 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
841 int needed;
842
843 key.objectid = bytenr;
844 key.type = BTRFS_EXTENT_ITEM_KEY;
845 key.offset = num_bytes;
846
847 want = extent_ref_type(parent, owner);
848 if (insert) {
849 extra_size = btrfs_extent_inline_ref_size(want);
850 path->keep_locks = 1;
851 } else
852 extra_size = -1;
853
854
855
856
857
858 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
859 key.type = BTRFS_METADATA_ITEM_KEY;
860 key.offset = owner;
861 }
862
863 again:
864 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
865 if (ret < 0) {
866 err = ret;
867 goto out;
868 }
869
870
871
872
873
874 if (ret > 0 && skinny_metadata) {
875 skinny_metadata = false;
876 if (path->slots[0]) {
877 path->slots[0]--;
878 btrfs_item_key_to_cpu(path->nodes[0], &key,
879 path->slots[0]);
880 if (key.objectid == bytenr &&
881 key.type == BTRFS_EXTENT_ITEM_KEY &&
882 key.offset == num_bytes)
883 ret = 0;
884 }
885 if (ret) {
886 key.objectid = bytenr;
887 key.type = BTRFS_EXTENT_ITEM_KEY;
888 key.offset = num_bytes;
889 btrfs_release_path(path);
890 goto again;
891 }
892 }
893
894 if (ret && !insert) {
895 err = -ENOENT;
896 goto out;
897 } else if (WARN_ON(ret)) {
898 err = -EIO;
899 goto out;
900 }
901
902 leaf = path->nodes[0];
903 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
904 if (unlikely(item_size < sizeof(*ei))) {
905 err = -EINVAL;
906 btrfs_print_v0_err(fs_info);
907 btrfs_abort_transaction(trans, err);
908 goto out;
909 }
910
911 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
912 flags = btrfs_extent_flags(leaf, ei);
913
914 ptr = (unsigned long)(ei + 1);
915 end = (unsigned long)ei + item_size;
916
917 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
918 ptr += sizeof(struct btrfs_tree_block_info);
919 BUG_ON(ptr > end);
920 }
921
922 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
923 needed = BTRFS_REF_TYPE_DATA;
924 else
925 needed = BTRFS_REF_TYPE_BLOCK;
926
927 err = -ENOENT;
928 while (1) {
929 if (ptr >= end) {
930 WARN_ON(ptr > end);
931 break;
932 }
933 iref = (struct btrfs_extent_inline_ref *)ptr;
934 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
935 if (type == BTRFS_REF_TYPE_INVALID) {
936 err = -EUCLEAN;
937 goto out;
938 }
939
940 if (want < type)
941 break;
942 if (want > type) {
943 ptr += btrfs_extent_inline_ref_size(type);
944 continue;
945 }
946
947 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
948 struct btrfs_extent_data_ref *dref;
949 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
950 if (match_extent_data_ref(leaf, dref, root_objectid,
951 owner, offset)) {
952 err = 0;
953 break;
954 }
955 if (hash_extent_data_ref_item(leaf, dref) <
956 hash_extent_data_ref(root_objectid, owner, offset))
957 break;
958 } else {
959 u64 ref_offset;
960 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
961 if (parent > 0) {
962 if (parent == ref_offset) {
963 err = 0;
964 break;
965 }
966 if (ref_offset < parent)
967 break;
968 } else {
969 if (root_objectid == ref_offset) {
970 err = 0;
971 break;
972 }
973 if (ref_offset < root_objectid)
974 break;
975 }
976 }
977 ptr += btrfs_extent_inline_ref_size(type);
978 }
979 if (err == -ENOENT && insert) {
980 if (item_size + extra_size >=
981 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
982 err = -EAGAIN;
983 goto out;
984 }
985
986
987
988
989
990
991 if (find_next_key(path, 0, &key) == 0 &&
992 key.objectid == bytenr &&
993 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
994 err = -EAGAIN;
995 goto out;
996 }
997 }
998 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
999 out:
1000 if (insert) {
1001 path->keep_locks = 0;
1002 btrfs_unlock_up_safe(path, 1);
1003 }
1004 return err;
1005 }
1006
1007
1008
1009
1010 static noinline_for_stack
1011 void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1012 struct btrfs_path *path,
1013 struct btrfs_extent_inline_ref *iref,
1014 u64 parent, u64 root_objectid,
1015 u64 owner, u64 offset, int refs_to_add,
1016 struct btrfs_delayed_extent_op *extent_op)
1017 {
1018 struct extent_buffer *leaf;
1019 struct btrfs_extent_item *ei;
1020 unsigned long ptr;
1021 unsigned long end;
1022 unsigned long item_offset;
1023 u64 refs;
1024 int size;
1025 int type;
1026
1027 leaf = path->nodes[0];
1028 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1029 item_offset = (unsigned long)iref - (unsigned long)ei;
1030
1031 type = extent_ref_type(parent, owner);
1032 size = btrfs_extent_inline_ref_size(type);
1033
1034 btrfs_extend_item(path, size);
1035
1036 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1037 refs = btrfs_extent_refs(leaf, ei);
1038 refs += refs_to_add;
1039 btrfs_set_extent_refs(leaf, ei, refs);
1040 if (extent_op)
1041 __run_delayed_extent_op(extent_op, leaf, ei);
1042
1043 ptr = (unsigned long)ei + item_offset;
1044 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1045 if (ptr < end - size)
1046 memmove_extent_buffer(leaf, ptr + size, ptr,
1047 end - size - ptr);
1048
1049 iref = (struct btrfs_extent_inline_ref *)ptr;
1050 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1051 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1052 struct btrfs_extent_data_ref *dref;
1053 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1054 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1055 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1056 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1057 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1058 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1059 struct btrfs_shared_data_ref *sref;
1060 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1061 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1062 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1063 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1064 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1065 } else {
1066 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1067 }
1068 btrfs_mark_buffer_dirty(leaf);
1069 }
1070
1071 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1072 struct btrfs_path *path,
1073 struct btrfs_extent_inline_ref **ref_ret,
1074 u64 bytenr, u64 num_bytes, u64 parent,
1075 u64 root_objectid, u64 owner, u64 offset)
1076 {
1077 int ret;
1078
1079 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1080 num_bytes, parent, root_objectid,
1081 owner, offset, 0);
1082 if (ret != -ENOENT)
1083 return ret;
1084
1085 btrfs_release_path(path);
1086 *ref_ret = NULL;
1087
1088 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1089 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1090 root_objectid);
1091 } else {
1092 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1093 root_objectid, owner, offset);
1094 }
1095 return ret;
1096 }
1097
1098
1099
1100
1101 static noinline_for_stack
1102 void update_inline_extent_backref(struct btrfs_path *path,
1103 struct btrfs_extent_inline_ref *iref,
1104 int refs_to_mod,
1105 struct btrfs_delayed_extent_op *extent_op,
1106 int *last_ref)
1107 {
1108 struct extent_buffer *leaf = path->nodes[0];
1109 struct btrfs_extent_item *ei;
1110 struct btrfs_extent_data_ref *dref = NULL;
1111 struct btrfs_shared_data_ref *sref = NULL;
1112 unsigned long ptr;
1113 unsigned long end;
1114 u32 item_size;
1115 int size;
1116 int type;
1117 u64 refs;
1118
1119 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1120 refs = btrfs_extent_refs(leaf, ei);
1121 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1122 refs += refs_to_mod;
1123 btrfs_set_extent_refs(leaf, ei, refs);
1124 if (extent_op)
1125 __run_delayed_extent_op(extent_op, leaf, ei);
1126
1127
1128
1129
1130
1131 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1132 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1133
1134 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1135 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1136 refs = btrfs_extent_data_ref_count(leaf, dref);
1137 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1138 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1139 refs = btrfs_shared_data_ref_count(leaf, sref);
1140 } else {
1141 refs = 1;
1142 BUG_ON(refs_to_mod != -1);
1143 }
1144
1145 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1146 refs += refs_to_mod;
1147
1148 if (refs > 0) {
1149 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1150 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1151 else
1152 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1153 } else {
1154 *last_ref = 1;
1155 size = btrfs_extent_inline_ref_size(type);
1156 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1157 ptr = (unsigned long)iref;
1158 end = (unsigned long)ei + item_size;
1159 if (ptr + size < end)
1160 memmove_extent_buffer(leaf, ptr, ptr + size,
1161 end - ptr - size);
1162 item_size -= size;
1163 btrfs_truncate_item(path, item_size, 1);
1164 }
1165 btrfs_mark_buffer_dirty(leaf);
1166 }
1167
1168 static noinline_for_stack
1169 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1170 struct btrfs_path *path,
1171 u64 bytenr, u64 num_bytes, u64 parent,
1172 u64 root_objectid, u64 owner,
1173 u64 offset, int refs_to_add,
1174 struct btrfs_delayed_extent_op *extent_op)
1175 {
1176 struct btrfs_extent_inline_ref *iref;
1177 int ret;
1178
1179 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1180 num_bytes, parent, root_objectid,
1181 owner, offset, 1);
1182 if (ret == 0) {
1183 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1184 update_inline_extent_backref(path, iref, refs_to_add,
1185 extent_op, NULL);
1186 } else if (ret == -ENOENT) {
1187 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1188 root_objectid, owner, offset,
1189 refs_to_add, extent_op);
1190 ret = 0;
1191 }
1192 return ret;
1193 }
1194
1195 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1196 struct btrfs_path *path,
1197 u64 bytenr, u64 parent, u64 root_objectid,
1198 u64 owner, u64 offset, int refs_to_add)
1199 {
1200 int ret;
1201 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1202 BUG_ON(refs_to_add != 1);
1203 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1204 root_objectid);
1205 } else {
1206 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1207 root_objectid, owner, offset,
1208 refs_to_add);
1209 }
1210 return ret;
1211 }
1212
1213 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1214 struct btrfs_path *path,
1215 struct btrfs_extent_inline_ref *iref,
1216 int refs_to_drop, int is_data, int *last_ref)
1217 {
1218 int ret = 0;
1219
1220 BUG_ON(!is_data && refs_to_drop != 1);
1221 if (iref) {
1222 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1223 last_ref);
1224 } else if (is_data) {
1225 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1226 last_ref);
1227 } else {
1228 *last_ref = 1;
1229 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1230 }
1231 return ret;
1232 }
1233
1234 static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1235 u64 *discarded_bytes)
1236 {
1237 int j, ret = 0;
1238 u64 bytes_left, end;
1239 u64 aligned_start = ALIGN(start, 1 << 9);
1240
1241 if (WARN_ON(start != aligned_start)) {
1242 len -= aligned_start - start;
1243 len = round_down(len, 1 << 9);
1244 start = aligned_start;
1245 }
1246
1247 *discarded_bytes = 0;
1248
1249 if (!len)
1250 return 0;
1251
1252 end = start + len;
1253 bytes_left = len;
1254
1255
1256 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1257 u64 sb_start = btrfs_sb_offset(j);
1258 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1259 u64 size = sb_start - start;
1260
1261 if (!in_range(sb_start, start, bytes_left) &&
1262 !in_range(sb_end, start, bytes_left) &&
1263 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1264 continue;
1265
1266
1267
1268
1269
1270 if (sb_start <= start) {
1271 start += sb_end - start;
1272 if (start > end) {
1273 bytes_left = 0;
1274 break;
1275 }
1276 bytes_left = end - start;
1277 continue;
1278 }
1279
1280 if (size) {
1281 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1282 GFP_NOFS, 0);
1283 if (!ret)
1284 *discarded_bytes += size;
1285 else if (ret != -EOPNOTSUPP)
1286 return ret;
1287 }
1288
1289 start = sb_end;
1290 if (start > end) {
1291 bytes_left = 0;
1292 break;
1293 }
1294 bytes_left = end - start;
1295 }
1296
1297 if (bytes_left) {
1298 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1299 GFP_NOFS, 0);
1300 if (!ret)
1301 *discarded_bytes += bytes_left;
1302 }
1303 return ret;
1304 }
1305
1306 int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1307 u64 num_bytes, u64 *actual_bytes)
1308 {
1309 int ret;
1310 u64 discarded_bytes = 0;
1311 struct btrfs_bio *bbio = NULL;
1312
1313
1314
1315
1316
1317
1318 btrfs_bio_counter_inc_blocked(fs_info);
1319
1320 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
1321 &bbio, 0);
1322
1323 if (!ret) {
1324 struct btrfs_bio_stripe *stripe = bbio->stripes;
1325 int i;
1326
1327
1328 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1329 u64 bytes;
1330 struct request_queue *req_q;
1331
1332 if (!stripe->dev->bdev) {
1333 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1334 continue;
1335 }
1336 req_q = bdev_get_queue(stripe->dev->bdev);
1337 if (!blk_queue_discard(req_q))
1338 continue;
1339
1340 ret = btrfs_issue_discard(stripe->dev->bdev,
1341 stripe->physical,
1342 stripe->length,
1343 &bytes);
1344 if (!ret)
1345 discarded_bytes += bytes;
1346 else if (ret != -EOPNOTSUPP)
1347 break;
1348
1349
1350
1351
1352
1353
1354 ret = 0;
1355 }
1356 btrfs_put_bbio(bbio);
1357 }
1358 btrfs_bio_counter_dec(fs_info);
1359
1360 if (actual_bytes)
1361 *actual_bytes = discarded_bytes;
1362
1363
1364 if (ret == -EOPNOTSUPP)
1365 ret = 0;
1366 return ret;
1367 }
1368
1369
1370 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1371 struct btrfs_ref *generic_ref)
1372 {
1373 struct btrfs_fs_info *fs_info = trans->fs_info;
1374 int old_ref_mod, new_ref_mod;
1375 int ret;
1376
1377 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1378 generic_ref->action);
1379 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1380 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1381
1382 if (generic_ref->type == BTRFS_REF_METADATA)
1383 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
1384 NULL, &old_ref_mod, &new_ref_mod);
1385 else
1386 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
1387 &old_ref_mod, &new_ref_mod);
1388
1389 btrfs_ref_tree_mod(fs_info, generic_ref);
1390
1391 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
1392 sub_pinned_bytes(fs_info, generic_ref);
1393
1394 return ret;
1395 }
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1432 struct btrfs_delayed_ref_node *node,
1433 u64 parent, u64 root_objectid,
1434 u64 owner, u64 offset, int refs_to_add,
1435 struct btrfs_delayed_extent_op *extent_op)
1436 {
1437 struct btrfs_path *path;
1438 struct extent_buffer *leaf;
1439 struct btrfs_extent_item *item;
1440 struct btrfs_key key;
1441 u64 bytenr = node->bytenr;
1442 u64 num_bytes = node->num_bytes;
1443 u64 refs;
1444 int ret;
1445
1446 path = btrfs_alloc_path();
1447 if (!path)
1448 return -ENOMEM;
1449
1450 path->reada = READA_FORWARD;
1451 path->leave_spinning = 1;
1452
1453 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1454 parent, root_objectid, owner,
1455 offset, refs_to_add, extent_op);
1456 if ((ret < 0 && ret != -EAGAIN) || !ret)
1457 goto out;
1458
1459
1460
1461
1462
1463
1464 leaf = path->nodes[0];
1465 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1466 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1467 refs = btrfs_extent_refs(leaf, item);
1468 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1469 if (extent_op)
1470 __run_delayed_extent_op(extent_op, leaf, item);
1471
1472 btrfs_mark_buffer_dirty(leaf);
1473 btrfs_release_path(path);
1474
1475 path->reada = READA_FORWARD;
1476 path->leave_spinning = 1;
1477
1478 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
1479 owner, offset, refs_to_add);
1480 if (ret)
1481 btrfs_abort_transaction(trans, ret);
1482 out:
1483 btrfs_free_path(path);
1484 return ret;
1485 }
1486
1487 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1488 struct btrfs_delayed_ref_node *node,
1489 struct btrfs_delayed_extent_op *extent_op,
1490 int insert_reserved)
1491 {
1492 int ret = 0;
1493 struct btrfs_delayed_data_ref *ref;
1494 struct btrfs_key ins;
1495 u64 parent = 0;
1496 u64 ref_root = 0;
1497 u64 flags = 0;
1498
1499 ins.objectid = node->bytenr;
1500 ins.offset = node->num_bytes;
1501 ins.type = BTRFS_EXTENT_ITEM_KEY;
1502
1503 ref = btrfs_delayed_node_to_data_ref(node);
1504 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1505
1506 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1507 parent = ref->parent;
1508 ref_root = ref->root;
1509
1510 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1511 if (extent_op)
1512 flags |= extent_op->flags_to_set;
1513 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1514 flags, ref->objectid,
1515 ref->offset, &ins,
1516 node->ref_mod);
1517 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1518 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1519 ref->objectid, ref->offset,
1520 node->ref_mod, extent_op);
1521 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1522 ret = __btrfs_free_extent(trans, node, parent,
1523 ref_root, ref->objectid,
1524 ref->offset, node->ref_mod,
1525 extent_op);
1526 } else {
1527 BUG();
1528 }
1529 return ret;
1530 }
1531
1532 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1533 struct extent_buffer *leaf,
1534 struct btrfs_extent_item *ei)
1535 {
1536 u64 flags = btrfs_extent_flags(leaf, ei);
1537 if (extent_op->update_flags) {
1538 flags |= extent_op->flags_to_set;
1539 btrfs_set_extent_flags(leaf, ei, flags);
1540 }
1541
1542 if (extent_op->update_key) {
1543 struct btrfs_tree_block_info *bi;
1544 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1545 bi = (struct btrfs_tree_block_info *)(ei + 1);
1546 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1547 }
1548 }
1549
1550 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1551 struct btrfs_delayed_ref_head *head,
1552 struct btrfs_delayed_extent_op *extent_op)
1553 {
1554 struct btrfs_fs_info *fs_info = trans->fs_info;
1555 struct btrfs_key key;
1556 struct btrfs_path *path;
1557 struct btrfs_extent_item *ei;
1558 struct extent_buffer *leaf;
1559 u32 item_size;
1560 int ret;
1561 int err = 0;
1562 int metadata = !extent_op->is_data;
1563
1564 if (trans->aborted)
1565 return 0;
1566
1567 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1568 metadata = 0;
1569
1570 path = btrfs_alloc_path();
1571 if (!path)
1572 return -ENOMEM;
1573
1574 key.objectid = head->bytenr;
1575
1576 if (metadata) {
1577 key.type = BTRFS_METADATA_ITEM_KEY;
1578 key.offset = extent_op->level;
1579 } else {
1580 key.type = BTRFS_EXTENT_ITEM_KEY;
1581 key.offset = head->num_bytes;
1582 }
1583
1584 again:
1585 path->reada = READA_FORWARD;
1586 path->leave_spinning = 1;
1587 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1588 if (ret < 0) {
1589 err = ret;
1590 goto out;
1591 }
1592 if (ret > 0) {
1593 if (metadata) {
1594 if (path->slots[0] > 0) {
1595 path->slots[0]--;
1596 btrfs_item_key_to_cpu(path->nodes[0], &key,
1597 path->slots[0]);
1598 if (key.objectid == head->bytenr &&
1599 key.type == BTRFS_EXTENT_ITEM_KEY &&
1600 key.offset == head->num_bytes)
1601 ret = 0;
1602 }
1603 if (ret > 0) {
1604 btrfs_release_path(path);
1605 metadata = 0;
1606
1607 key.objectid = head->bytenr;
1608 key.offset = head->num_bytes;
1609 key.type = BTRFS_EXTENT_ITEM_KEY;
1610 goto again;
1611 }
1612 } else {
1613 err = -EIO;
1614 goto out;
1615 }
1616 }
1617
1618 leaf = path->nodes[0];
1619 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1620
1621 if (unlikely(item_size < sizeof(*ei))) {
1622 err = -EINVAL;
1623 btrfs_print_v0_err(fs_info);
1624 btrfs_abort_transaction(trans, err);
1625 goto out;
1626 }
1627
1628 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1629 __run_delayed_extent_op(extent_op, leaf, ei);
1630
1631 btrfs_mark_buffer_dirty(leaf);
1632 out:
1633 btrfs_free_path(path);
1634 return err;
1635 }
1636
1637 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1638 struct btrfs_delayed_ref_node *node,
1639 struct btrfs_delayed_extent_op *extent_op,
1640 int insert_reserved)
1641 {
1642 int ret = 0;
1643 struct btrfs_delayed_tree_ref *ref;
1644 u64 parent = 0;
1645 u64 ref_root = 0;
1646
1647 ref = btrfs_delayed_node_to_tree_ref(node);
1648 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1649
1650 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1651 parent = ref->parent;
1652 ref_root = ref->root;
1653
1654 if (node->ref_mod != 1) {
1655 btrfs_err(trans->fs_info,
1656 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1657 node->bytenr, node->ref_mod, node->action, ref_root,
1658 parent);
1659 return -EIO;
1660 }
1661 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1662 BUG_ON(!extent_op || !extent_op->update_flags);
1663 ret = alloc_reserved_tree_block(trans, node, extent_op);
1664 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1665 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1666 ref->level, 0, 1, extent_op);
1667 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1668 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1669 ref->level, 0, 1, extent_op);
1670 } else {
1671 BUG();
1672 }
1673 return ret;
1674 }
1675
1676
1677 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1678 struct btrfs_delayed_ref_node *node,
1679 struct btrfs_delayed_extent_op *extent_op,
1680 int insert_reserved)
1681 {
1682 int ret = 0;
1683
1684 if (trans->aborted) {
1685 if (insert_reserved)
1686 btrfs_pin_extent(trans->fs_info, node->bytenr,
1687 node->num_bytes, 1);
1688 return 0;
1689 }
1690
1691 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1692 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1693 ret = run_delayed_tree_ref(trans, node, extent_op,
1694 insert_reserved);
1695 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1696 node->type == BTRFS_SHARED_DATA_REF_KEY)
1697 ret = run_delayed_data_ref(trans, node, extent_op,
1698 insert_reserved);
1699 else
1700 BUG();
1701 if (ret && insert_reserved)
1702 btrfs_pin_extent(trans->fs_info, node->bytenr,
1703 node->num_bytes, 1);
1704 return ret;
1705 }
1706
1707 static inline struct btrfs_delayed_ref_node *
1708 select_delayed_ref(struct btrfs_delayed_ref_head *head)
1709 {
1710 struct btrfs_delayed_ref_node *ref;
1711
1712 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1713 return NULL;
1714
1715
1716
1717
1718
1719
1720
1721 if (!list_empty(&head->ref_add_list))
1722 return list_first_entry(&head->ref_add_list,
1723 struct btrfs_delayed_ref_node, add_list);
1724
1725 ref = rb_entry(rb_first_cached(&head->ref_tree),
1726 struct btrfs_delayed_ref_node, ref_node);
1727 ASSERT(list_empty(&ref->add_list));
1728 return ref;
1729 }
1730
1731 static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1732 struct btrfs_delayed_ref_head *head)
1733 {
1734 spin_lock(&delayed_refs->lock);
1735 head->processing = 0;
1736 delayed_refs->num_heads_ready++;
1737 spin_unlock(&delayed_refs->lock);
1738 btrfs_delayed_ref_unlock(head);
1739 }
1740
1741 static struct btrfs_delayed_extent_op *cleanup_extent_op(
1742 struct btrfs_delayed_ref_head *head)
1743 {
1744 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1745
1746 if (!extent_op)
1747 return NULL;
1748
1749 if (head->must_insert_reserved) {
1750 head->extent_op = NULL;
1751 btrfs_free_delayed_extent_op(extent_op);
1752 return NULL;
1753 }
1754 return extent_op;
1755 }
1756
1757 static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1758 struct btrfs_delayed_ref_head *head)
1759 {
1760 struct btrfs_delayed_extent_op *extent_op;
1761 int ret;
1762
1763 extent_op = cleanup_extent_op(head);
1764 if (!extent_op)
1765 return 0;
1766 head->extent_op = NULL;
1767 spin_unlock(&head->lock);
1768 ret = run_delayed_extent_op(trans, head, extent_op);
1769 btrfs_free_delayed_extent_op(extent_op);
1770 return ret ? ret : 1;
1771 }
1772
1773 void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1774 struct btrfs_delayed_ref_root *delayed_refs,
1775 struct btrfs_delayed_ref_head *head)
1776 {
1777 int nr_items = 1;
1778
1779 if (head->total_ref_mod < 0) {
1780 struct btrfs_space_info *space_info;
1781 u64 flags;
1782
1783 if (head->is_data)
1784 flags = BTRFS_BLOCK_GROUP_DATA;
1785 else if (head->is_system)
1786 flags = BTRFS_BLOCK_GROUP_SYSTEM;
1787 else
1788 flags = BTRFS_BLOCK_GROUP_METADATA;
1789 space_info = btrfs_find_space_info(fs_info, flags);
1790 ASSERT(space_info);
1791 percpu_counter_add_batch(&space_info->total_bytes_pinned,
1792 -head->num_bytes,
1793 BTRFS_TOTAL_BYTES_PINNED_BATCH);
1794
1795
1796
1797
1798
1799
1800 if (head->is_data) {
1801 spin_lock(&delayed_refs->lock);
1802 delayed_refs->pending_csums -= head->num_bytes;
1803 spin_unlock(&delayed_refs->lock);
1804 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
1805 head->num_bytes);
1806 }
1807 }
1808
1809 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1810 }
1811
1812 static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1813 struct btrfs_delayed_ref_head *head)
1814 {
1815
1816 struct btrfs_fs_info *fs_info = trans->fs_info;
1817 struct btrfs_delayed_ref_root *delayed_refs;
1818 int ret;
1819
1820 delayed_refs = &trans->transaction->delayed_refs;
1821
1822 ret = run_and_cleanup_extent_op(trans, head);
1823 if (ret < 0) {
1824 unselect_delayed_ref_head(delayed_refs, head);
1825 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1826 return ret;
1827 } else if (ret) {
1828 return ret;
1829 }
1830
1831
1832
1833
1834
1835 spin_unlock(&head->lock);
1836 spin_lock(&delayed_refs->lock);
1837 spin_lock(&head->lock);
1838 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1839 spin_unlock(&head->lock);
1840 spin_unlock(&delayed_refs->lock);
1841 return 1;
1842 }
1843 btrfs_delete_ref_head(delayed_refs, head);
1844 spin_unlock(&head->lock);
1845 spin_unlock(&delayed_refs->lock);
1846
1847 if (head->must_insert_reserved) {
1848 btrfs_pin_extent(fs_info, head->bytenr,
1849 head->num_bytes, 1);
1850 if (head->is_data) {
1851 ret = btrfs_del_csums(trans, fs_info->csum_root,
1852 head->bytenr, head->num_bytes);
1853 }
1854 }
1855
1856 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1857
1858 trace_run_delayed_ref_head(fs_info, head, 0);
1859 btrfs_delayed_ref_unlock(head);
1860 btrfs_put_delayed_ref_head(head);
1861 return 0;
1862 }
1863
1864 static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1865 struct btrfs_trans_handle *trans)
1866 {
1867 struct btrfs_delayed_ref_root *delayed_refs =
1868 &trans->transaction->delayed_refs;
1869 struct btrfs_delayed_ref_head *head = NULL;
1870 int ret;
1871
1872 spin_lock(&delayed_refs->lock);
1873 head = btrfs_select_ref_head(delayed_refs);
1874 if (!head) {
1875 spin_unlock(&delayed_refs->lock);
1876 return head;
1877 }
1878
1879
1880
1881
1882
1883 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1884 spin_unlock(&delayed_refs->lock);
1885
1886
1887
1888
1889
1890
1891 if (ret == -EAGAIN)
1892 head = ERR_PTR(-EAGAIN);
1893
1894 return head;
1895 }
1896
1897 static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1898 struct btrfs_delayed_ref_head *locked_ref,
1899 unsigned long *run_refs)
1900 {
1901 struct btrfs_fs_info *fs_info = trans->fs_info;
1902 struct btrfs_delayed_ref_root *delayed_refs;
1903 struct btrfs_delayed_extent_op *extent_op;
1904 struct btrfs_delayed_ref_node *ref;
1905 int must_insert_reserved = 0;
1906 int ret;
1907
1908 delayed_refs = &trans->transaction->delayed_refs;
1909
1910 lockdep_assert_held(&locked_ref->mutex);
1911 lockdep_assert_held(&locked_ref->lock);
1912
1913 while ((ref = select_delayed_ref(locked_ref))) {
1914 if (ref->seq &&
1915 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1916 spin_unlock(&locked_ref->lock);
1917 unselect_delayed_ref_head(delayed_refs, locked_ref);
1918 return -EAGAIN;
1919 }
1920
1921 (*run_refs)++;
1922 ref->in_tree = 0;
1923 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1924 RB_CLEAR_NODE(&ref->ref_node);
1925 if (!list_empty(&ref->add_list))
1926 list_del(&ref->add_list);
1927
1928
1929
1930
1931 switch (ref->action) {
1932 case BTRFS_ADD_DELAYED_REF:
1933 case BTRFS_ADD_DELAYED_EXTENT:
1934 locked_ref->ref_mod -= ref->ref_mod;
1935 break;
1936 case BTRFS_DROP_DELAYED_REF:
1937 locked_ref->ref_mod += ref->ref_mod;
1938 break;
1939 default:
1940 WARN_ON(1);
1941 }
1942 atomic_dec(&delayed_refs->num_entries);
1943
1944
1945
1946
1947
1948 must_insert_reserved = locked_ref->must_insert_reserved;
1949 locked_ref->must_insert_reserved = 0;
1950
1951 extent_op = locked_ref->extent_op;
1952 locked_ref->extent_op = NULL;
1953 spin_unlock(&locked_ref->lock);
1954
1955 ret = run_one_delayed_ref(trans, ref, extent_op,
1956 must_insert_reserved);
1957
1958 btrfs_free_delayed_extent_op(extent_op);
1959 if (ret) {
1960 unselect_delayed_ref_head(delayed_refs, locked_ref);
1961 btrfs_put_delayed_ref(ref);
1962 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1963 ret);
1964 return ret;
1965 }
1966
1967 btrfs_put_delayed_ref(ref);
1968 cond_resched();
1969
1970 spin_lock(&locked_ref->lock);
1971 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1972 }
1973
1974 return 0;
1975 }
1976
1977
1978
1979
1980
1981 static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1982 unsigned long nr)
1983 {
1984 struct btrfs_fs_info *fs_info = trans->fs_info;
1985 struct btrfs_delayed_ref_root *delayed_refs;
1986 struct btrfs_delayed_ref_head *locked_ref = NULL;
1987 ktime_t start = ktime_get();
1988 int ret;
1989 unsigned long count = 0;
1990 unsigned long actual_count = 0;
1991
1992 delayed_refs = &trans->transaction->delayed_refs;
1993 do {
1994 if (!locked_ref) {
1995 locked_ref = btrfs_obtain_ref_head(trans);
1996 if (IS_ERR_OR_NULL(locked_ref)) {
1997 if (PTR_ERR(locked_ref) == -EAGAIN) {
1998 continue;
1999 } else {
2000 break;
2001 }
2002 }
2003 count++;
2004 }
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017 spin_lock(&locked_ref->lock);
2018 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2019
2020 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2021 &actual_count);
2022 if (ret < 0 && ret != -EAGAIN) {
2023
2024
2025
2026
2027 return ret;
2028 } else if (!ret) {
2029
2030
2031
2032
2033 ret = cleanup_ref_head(trans, locked_ref);
2034 if (ret > 0 ) {
2035
2036 ret = 0;
2037 continue;
2038 } else if (ret) {
2039 return ret;
2040 }
2041 }
2042
2043
2044
2045
2046
2047
2048 locked_ref = NULL;
2049 cond_resched();
2050 } while ((nr != -1 && count < nr) || locked_ref);
2051
2052
2053
2054
2055
2056
2057 if (actual_count > 0) {
2058 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2059 u64 avg;
2060
2061
2062
2063
2064
2065 spin_lock(&delayed_refs->lock);
2066 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2067 fs_info->avg_delayed_ref_runtime = avg >> 2;
2068 spin_unlock(&delayed_refs->lock);
2069 }
2070 return 0;
2071 }
2072
2073 #ifdef SCRAMBLE_DELAYED_REFS
2074
2075
2076
2077
2078
2079 static u64 find_middle(struct rb_root *root)
2080 {
2081 struct rb_node *n = root->rb_node;
2082 struct btrfs_delayed_ref_node *entry;
2083 int alt = 1;
2084 u64 middle;
2085 u64 first = 0, last = 0;
2086
2087 n = rb_first(root);
2088 if (n) {
2089 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2090 first = entry->bytenr;
2091 }
2092 n = rb_last(root);
2093 if (n) {
2094 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2095 last = entry->bytenr;
2096 }
2097 n = root->rb_node;
2098
2099 while (n) {
2100 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2101 WARN_ON(!entry->in_tree);
2102
2103 middle = entry->bytenr;
2104
2105 if (alt)
2106 n = n->rb_left;
2107 else
2108 n = n->rb_right;
2109
2110 alt = 1 - alt;
2111 }
2112 return middle;
2113 }
2114 #endif
2115
2116 static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2117 {
2118 u64 num_bytes;
2119
2120 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2121 sizeof(struct btrfs_extent_inline_ref));
2122 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2123 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2124
2125
2126
2127
2128
2129 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2130 }
2131
2132
2133
2134
2135
2136 u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2137 {
2138 u64 csum_size;
2139 u64 num_csums_per_leaf;
2140 u64 num_csums;
2141
2142 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2143 num_csums_per_leaf = div64_u64(csum_size,
2144 (u64)btrfs_super_csum_size(fs_info->super_copy));
2145 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2146 num_csums += num_csums_per_leaf - 1;
2147 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2148 return num_csums;
2149 }
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2162 unsigned long count)
2163 {
2164 struct btrfs_fs_info *fs_info = trans->fs_info;
2165 struct rb_node *node;
2166 struct btrfs_delayed_ref_root *delayed_refs;
2167 struct btrfs_delayed_ref_head *head;
2168 int ret;
2169 int run_all = count == (unsigned long)-1;
2170
2171
2172 if (trans->aborted)
2173 return 0;
2174
2175 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2176 return 0;
2177
2178 delayed_refs = &trans->transaction->delayed_refs;
2179 if (count == 0)
2180 count = atomic_read(&delayed_refs->num_entries) * 2;
2181
2182 again:
2183 #ifdef SCRAMBLE_DELAYED_REFS
2184 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2185 #endif
2186 ret = __btrfs_run_delayed_refs(trans, count);
2187 if (ret < 0) {
2188 btrfs_abort_transaction(trans, ret);
2189 return ret;
2190 }
2191
2192 if (run_all) {
2193 btrfs_create_pending_block_groups(trans);
2194
2195 spin_lock(&delayed_refs->lock);
2196 node = rb_first_cached(&delayed_refs->href_root);
2197 if (!node) {
2198 spin_unlock(&delayed_refs->lock);
2199 goto out;
2200 }
2201 head = rb_entry(node, struct btrfs_delayed_ref_head,
2202 href_node);
2203 refcount_inc(&head->refs);
2204 spin_unlock(&delayed_refs->lock);
2205
2206
2207 mutex_lock(&head->mutex);
2208 mutex_unlock(&head->mutex);
2209
2210 btrfs_put_delayed_ref_head(head);
2211 cond_resched();
2212 goto again;
2213 }
2214 out:
2215 return 0;
2216 }
2217
2218 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2219 u64 bytenr, u64 num_bytes, u64 flags,
2220 int level, int is_data)
2221 {
2222 struct btrfs_delayed_extent_op *extent_op;
2223 int ret;
2224
2225 extent_op = btrfs_alloc_delayed_extent_op();
2226 if (!extent_op)
2227 return -ENOMEM;
2228
2229 extent_op->flags_to_set = flags;
2230 extent_op->update_flags = true;
2231 extent_op->update_key = false;
2232 extent_op->is_data = is_data ? true : false;
2233 extent_op->level = level;
2234
2235 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2236 if (ret)
2237 btrfs_free_delayed_extent_op(extent_op);
2238 return ret;
2239 }
2240
2241 static noinline int check_delayed_ref(struct btrfs_root *root,
2242 struct btrfs_path *path,
2243 u64 objectid, u64 offset, u64 bytenr)
2244 {
2245 struct btrfs_delayed_ref_head *head;
2246 struct btrfs_delayed_ref_node *ref;
2247 struct btrfs_delayed_data_ref *data_ref;
2248 struct btrfs_delayed_ref_root *delayed_refs;
2249 struct btrfs_transaction *cur_trans;
2250 struct rb_node *node;
2251 int ret = 0;
2252
2253 spin_lock(&root->fs_info->trans_lock);
2254 cur_trans = root->fs_info->running_transaction;
2255 if (cur_trans)
2256 refcount_inc(&cur_trans->use_count);
2257 spin_unlock(&root->fs_info->trans_lock);
2258 if (!cur_trans)
2259 return 0;
2260
2261 delayed_refs = &cur_trans->delayed_refs;
2262 spin_lock(&delayed_refs->lock);
2263 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2264 if (!head) {
2265 spin_unlock(&delayed_refs->lock);
2266 btrfs_put_transaction(cur_trans);
2267 return 0;
2268 }
2269
2270 if (!mutex_trylock(&head->mutex)) {
2271 refcount_inc(&head->refs);
2272 spin_unlock(&delayed_refs->lock);
2273
2274 btrfs_release_path(path);
2275
2276
2277
2278
2279
2280 mutex_lock(&head->mutex);
2281 mutex_unlock(&head->mutex);
2282 btrfs_put_delayed_ref_head(head);
2283 btrfs_put_transaction(cur_trans);
2284 return -EAGAIN;
2285 }
2286 spin_unlock(&delayed_refs->lock);
2287
2288 spin_lock(&head->lock);
2289
2290
2291
2292
2293 for (node = rb_first_cached(&head->ref_tree); node;
2294 node = rb_next(node)) {
2295 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2296
2297 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2298 ret = 1;
2299 break;
2300 }
2301
2302 data_ref = btrfs_delayed_node_to_data_ref(ref);
2303
2304
2305
2306
2307
2308 if (data_ref->root != root->root_key.objectid ||
2309 data_ref->objectid != objectid ||
2310 data_ref->offset != offset) {
2311 ret = 1;
2312 break;
2313 }
2314 }
2315 spin_unlock(&head->lock);
2316 mutex_unlock(&head->mutex);
2317 btrfs_put_transaction(cur_trans);
2318 return ret;
2319 }
2320
2321 static noinline int check_committed_ref(struct btrfs_root *root,
2322 struct btrfs_path *path,
2323 u64 objectid, u64 offset, u64 bytenr)
2324 {
2325 struct btrfs_fs_info *fs_info = root->fs_info;
2326 struct btrfs_root *extent_root = fs_info->extent_root;
2327 struct extent_buffer *leaf;
2328 struct btrfs_extent_data_ref *ref;
2329 struct btrfs_extent_inline_ref *iref;
2330 struct btrfs_extent_item *ei;
2331 struct btrfs_key key;
2332 u32 item_size;
2333 int type;
2334 int ret;
2335
2336 key.objectid = bytenr;
2337 key.offset = (u64)-1;
2338 key.type = BTRFS_EXTENT_ITEM_KEY;
2339
2340 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2341 if (ret < 0)
2342 goto out;
2343 BUG_ON(ret == 0);
2344
2345 ret = -ENOENT;
2346 if (path->slots[0] == 0)
2347 goto out;
2348
2349 path->slots[0]--;
2350 leaf = path->nodes[0];
2351 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2352
2353 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2354 goto out;
2355
2356 ret = 1;
2357 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2358 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2359
2360
2361 if (item_size != sizeof(*ei) +
2362 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2363 goto out;
2364
2365
2366 if (btrfs_extent_generation(leaf, ei) <=
2367 btrfs_root_last_snapshot(&root->root_item))
2368 goto out;
2369
2370 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2371
2372
2373 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2374 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2375 goto out;
2376
2377 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2378 if (btrfs_extent_refs(leaf, ei) !=
2379 btrfs_extent_data_ref_count(leaf, ref) ||
2380 btrfs_extent_data_ref_root(leaf, ref) !=
2381 root->root_key.objectid ||
2382 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2383 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2384 goto out;
2385
2386 ret = 0;
2387 out:
2388 return ret;
2389 }
2390
2391 int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2392 u64 bytenr)
2393 {
2394 struct btrfs_path *path;
2395 int ret;
2396
2397 path = btrfs_alloc_path();
2398 if (!path)
2399 return -ENOMEM;
2400
2401 do {
2402 ret = check_committed_ref(root, path, objectid,
2403 offset, bytenr);
2404 if (ret && ret != -ENOENT)
2405 goto out;
2406
2407 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2408 } while (ret == -EAGAIN);
2409
2410 out:
2411 btrfs_free_path(path);
2412 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2413 WARN_ON(ret > 0);
2414 return ret;
2415 }
2416
2417 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2418 struct btrfs_root *root,
2419 struct extent_buffer *buf,
2420 int full_backref, int inc)
2421 {
2422 struct btrfs_fs_info *fs_info = root->fs_info;
2423 u64 bytenr;
2424 u64 num_bytes;
2425 u64 parent;
2426 u64 ref_root;
2427 u32 nritems;
2428 struct btrfs_key key;
2429 struct btrfs_file_extent_item *fi;
2430 struct btrfs_ref generic_ref = { 0 };
2431 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2432 int i;
2433 int action;
2434 int level;
2435 int ret = 0;
2436
2437 if (btrfs_is_testing(fs_info))
2438 return 0;
2439
2440 ref_root = btrfs_header_owner(buf);
2441 nritems = btrfs_header_nritems(buf);
2442 level = btrfs_header_level(buf);
2443
2444 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
2445 return 0;
2446
2447 if (full_backref)
2448 parent = buf->start;
2449 else
2450 parent = 0;
2451 if (inc)
2452 action = BTRFS_ADD_DELAYED_REF;
2453 else
2454 action = BTRFS_DROP_DELAYED_REF;
2455
2456 for (i = 0; i < nritems; i++) {
2457 if (level == 0) {
2458 btrfs_item_key_to_cpu(buf, &key, i);
2459 if (key.type != BTRFS_EXTENT_DATA_KEY)
2460 continue;
2461 fi = btrfs_item_ptr(buf, i,
2462 struct btrfs_file_extent_item);
2463 if (btrfs_file_extent_type(buf, fi) ==
2464 BTRFS_FILE_EXTENT_INLINE)
2465 continue;
2466 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2467 if (bytenr == 0)
2468 continue;
2469
2470 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2471 key.offset -= btrfs_file_extent_offset(buf, fi);
2472 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2473 num_bytes, parent);
2474 generic_ref.real_root = root->root_key.objectid;
2475 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2476 key.offset);
2477 generic_ref.skip_qgroup = for_reloc;
2478 if (inc)
2479 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2480 else
2481 ret = btrfs_free_extent(trans, &generic_ref);
2482 if (ret)
2483 goto fail;
2484 } else {
2485 bytenr = btrfs_node_blockptr(buf, i);
2486 num_bytes = fs_info->nodesize;
2487 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2488 num_bytes, parent);
2489 generic_ref.real_root = root->root_key.objectid;
2490 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2491 generic_ref.skip_qgroup = for_reloc;
2492 if (inc)
2493 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2494 else
2495 ret = btrfs_free_extent(trans, &generic_ref);
2496 if (ret)
2497 goto fail;
2498 }
2499 }
2500 return 0;
2501 fail:
2502 return ret;
2503 }
2504
2505 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2506 struct extent_buffer *buf, int full_backref)
2507 {
2508 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2509 }
2510
2511 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2512 struct extent_buffer *buf, int full_backref)
2513 {
2514 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2515 }
2516
2517 int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
2518 {
2519 struct btrfs_block_group_cache *block_group;
2520 int readonly = 0;
2521
2522 block_group = btrfs_lookup_block_group(fs_info, bytenr);
2523 if (!block_group || block_group->ro)
2524 readonly = 1;
2525 if (block_group)
2526 btrfs_put_block_group(block_group);
2527 return readonly;
2528 }
2529
2530 static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2531 {
2532 struct btrfs_fs_info *fs_info = root->fs_info;
2533 u64 flags;
2534 u64 ret;
2535
2536 if (data)
2537 flags = BTRFS_BLOCK_GROUP_DATA;
2538 else if (root == fs_info->chunk_root)
2539 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2540 else
2541 flags = BTRFS_BLOCK_GROUP_METADATA;
2542
2543 ret = btrfs_get_alloc_profile(fs_info, flags);
2544 return ret;
2545 }
2546
2547 static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2548 {
2549 struct btrfs_block_group_cache *cache;
2550 u64 bytenr;
2551
2552 spin_lock(&fs_info->block_group_cache_lock);
2553 bytenr = fs_info->first_logical_byte;
2554 spin_unlock(&fs_info->block_group_cache_lock);
2555
2556 if (bytenr < (u64)-1)
2557 return bytenr;
2558
2559 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2560 if (!cache)
2561 return 0;
2562
2563 bytenr = cache->key.objectid;
2564 btrfs_put_block_group(cache);
2565
2566 return bytenr;
2567 }
2568
2569 static int pin_down_extent(struct btrfs_block_group_cache *cache,
2570 u64 bytenr, u64 num_bytes, int reserved)
2571 {
2572 struct btrfs_fs_info *fs_info = cache->fs_info;
2573
2574 spin_lock(&cache->space_info->lock);
2575 spin_lock(&cache->lock);
2576 cache->pinned += num_bytes;
2577 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2578 num_bytes);
2579 if (reserved) {
2580 cache->reserved -= num_bytes;
2581 cache->space_info->bytes_reserved -= num_bytes;
2582 }
2583 spin_unlock(&cache->lock);
2584 spin_unlock(&cache->space_info->lock);
2585
2586 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
2587 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2588 set_extent_dirty(fs_info->pinned_extents, bytenr,
2589 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2590 return 0;
2591 }
2592
2593
2594
2595
2596 int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2597 u64 bytenr, u64 num_bytes, int reserved)
2598 {
2599 struct btrfs_block_group_cache *cache;
2600
2601 cache = btrfs_lookup_block_group(fs_info, bytenr);
2602 BUG_ON(!cache);
2603
2604 pin_down_extent(cache, bytenr, num_bytes, reserved);
2605
2606 btrfs_put_block_group(cache);
2607 return 0;
2608 }
2609
2610
2611
2612
2613 int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
2614 u64 bytenr, u64 num_bytes)
2615 {
2616 struct btrfs_block_group_cache *cache;
2617 int ret;
2618
2619 cache = btrfs_lookup_block_group(fs_info, bytenr);
2620 if (!cache)
2621 return -EINVAL;
2622
2623
2624
2625
2626
2627
2628
2629 btrfs_cache_block_group(cache, 1);
2630
2631 pin_down_extent(cache, bytenr, num_bytes, 0);
2632
2633
2634 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2635 btrfs_put_block_group(cache);
2636 return ret;
2637 }
2638
2639 static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2640 u64 start, u64 num_bytes)
2641 {
2642 int ret;
2643 struct btrfs_block_group_cache *block_group;
2644 struct btrfs_caching_control *caching_ctl;
2645
2646 block_group = btrfs_lookup_block_group(fs_info, start);
2647 if (!block_group)
2648 return -EINVAL;
2649
2650 btrfs_cache_block_group(block_group, 0);
2651 caching_ctl = btrfs_get_caching_control(block_group);
2652
2653 if (!caching_ctl) {
2654
2655 BUG_ON(!btrfs_block_group_cache_done(block_group));
2656 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2657 } else {
2658 mutex_lock(&caching_ctl->mutex);
2659
2660 if (start >= caching_ctl->progress) {
2661 ret = btrfs_add_excluded_extent(fs_info, start,
2662 num_bytes);
2663 } else if (start + num_bytes <= caching_ctl->progress) {
2664 ret = btrfs_remove_free_space(block_group,
2665 start, num_bytes);
2666 } else {
2667 num_bytes = caching_ctl->progress - start;
2668 ret = btrfs_remove_free_space(block_group,
2669 start, num_bytes);
2670 if (ret)
2671 goto out_lock;
2672
2673 num_bytes = (start + num_bytes) -
2674 caching_ctl->progress;
2675 start = caching_ctl->progress;
2676 ret = btrfs_add_excluded_extent(fs_info, start,
2677 num_bytes);
2678 }
2679 out_lock:
2680 mutex_unlock(&caching_ctl->mutex);
2681 btrfs_put_caching_control(caching_ctl);
2682 }
2683 btrfs_put_block_group(block_group);
2684 return ret;
2685 }
2686
2687 int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2688 {
2689 struct btrfs_fs_info *fs_info = eb->fs_info;
2690 struct btrfs_file_extent_item *item;
2691 struct btrfs_key key;
2692 int found_type;
2693 int i;
2694 int ret = 0;
2695
2696 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2697 return 0;
2698
2699 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2700 btrfs_item_key_to_cpu(eb, &key, i);
2701 if (key.type != BTRFS_EXTENT_DATA_KEY)
2702 continue;
2703 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2704 found_type = btrfs_file_extent_type(eb, item);
2705 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2706 continue;
2707 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2708 continue;
2709 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2710 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2711 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2712 if (ret)
2713 break;
2714 }
2715
2716 return ret;
2717 }
2718
2719 static void
2720 btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
2721 {
2722 atomic_inc(&bg->reservations);
2723 }
2724
2725 void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
2726 {
2727 struct btrfs_caching_control *next;
2728 struct btrfs_caching_control *caching_ctl;
2729 struct btrfs_block_group_cache *cache;
2730
2731 down_write(&fs_info->commit_root_sem);
2732
2733 list_for_each_entry_safe(caching_ctl, next,
2734 &fs_info->caching_block_groups, list) {
2735 cache = caching_ctl->block_group;
2736 if (btrfs_block_group_cache_done(cache)) {
2737 cache->last_byte_to_unpin = (u64)-1;
2738 list_del_init(&caching_ctl->list);
2739 btrfs_put_caching_control(caching_ctl);
2740 } else {
2741 cache->last_byte_to_unpin = caching_ctl->progress;
2742 }
2743 }
2744
2745 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2746 fs_info->pinned_extents = &fs_info->freed_extents[1];
2747 else
2748 fs_info->pinned_extents = &fs_info->freed_extents[0];
2749
2750 up_write(&fs_info->commit_root_sem);
2751
2752 btrfs_update_global_block_rsv(fs_info);
2753 }
2754
2755
2756
2757
2758
2759 static struct btrfs_free_cluster *
2760 fetch_cluster_info(struct btrfs_fs_info *fs_info,
2761 struct btrfs_space_info *space_info, u64 *empty_cluster)
2762 {
2763 struct btrfs_free_cluster *ret = NULL;
2764
2765 *empty_cluster = 0;
2766 if (btrfs_mixed_space_info(space_info))
2767 return ret;
2768
2769 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2770 ret = &fs_info->meta_alloc_cluster;
2771 if (btrfs_test_opt(fs_info, SSD))
2772 *empty_cluster = SZ_2M;
2773 else
2774 *empty_cluster = SZ_64K;
2775 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2776 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2777 *empty_cluster = SZ_2M;
2778 ret = &fs_info->data_alloc_cluster;
2779 }
2780
2781 return ret;
2782 }
2783
2784 static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2785 u64 start, u64 end,
2786 const bool return_free_space)
2787 {
2788 struct btrfs_block_group_cache *cache = NULL;
2789 struct btrfs_space_info *space_info;
2790 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2791 struct btrfs_free_cluster *cluster = NULL;
2792 u64 len;
2793 u64 total_unpinned = 0;
2794 u64 empty_cluster = 0;
2795 bool readonly;
2796
2797 while (start <= end) {
2798 readonly = false;
2799 if (!cache ||
2800 start >= cache->key.objectid + cache->key.offset) {
2801 if (cache)
2802 btrfs_put_block_group(cache);
2803 total_unpinned = 0;
2804 cache = btrfs_lookup_block_group(fs_info, start);
2805 BUG_ON(!cache);
2806
2807 cluster = fetch_cluster_info(fs_info,
2808 cache->space_info,
2809 &empty_cluster);
2810 empty_cluster <<= 1;
2811 }
2812
2813 len = cache->key.objectid + cache->key.offset - start;
2814 len = min(len, end + 1 - start);
2815
2816 if (start < cache->last_byte_to_unpin) {
2817 len = min(len, cache->last_byte_to_unpin - start);
2818 if (return_free_space)
2819 btrfs_add_free_space(cache, start, len);
2820 }
2821
2822 start += len;
2823 total_unpinned += len;
2824 space_info = cache->space_info;
2825
2826
2827
2828
2829
2830
2831
2832 if (cluster && cluster->fragmented &&
2833 total_unpinned > empty_cluster) {
2834 spin_lock(&cluster->lock);
2835 cluster->fragmented = 0;
2836 spin_unlock(&cluster->lock);
2837 }
2838
2839 spin_lock(&space_info->lock);
2840 spin_lock(&cache->lock);
2841 cache->pinned -= len;
2842 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2843 space_info->max_extent_size = 0;
2844 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2845 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2846 if (cache->ro) {
2847 space_info->bytes_readonly += len;
2848 readonly = true;
2849 }
2850 spin_unlock(&cache->lock);
2851 if (!readonly && return_free_space &&
2852 global_rsv->space_info == space_info) {
2853 u64 to_add = len;
2854
2855 spin_lock(&global_rsv->lock);
2856 if (!global_rsv->full) {
2857 to_add = min(len, global_rsv->size -
2858 global_rsv->reserved);
2859 global_rsv->reserved += to_add;
2860 btrfs_space_info_update_bytes_may_use(fs_info,
2861 space_info, to_add);
2862 if (global_rsv->reserved >= global_rsv->size)
2863 global_rsv->full = 1;
2864 len -= to_add;
2865 }
2866 spin_unlock(&global_rsv->lock);
2867
2868 if (len)
2869 btrfs_try_granting_tickets(fs_info,
2870 space_info);
2871 }
2872 spin_unlock(&space_info->lock);
2873 }
2874
2875 if (cache)
2876 btrfs_put_block_group(cache);
2877 return 0;
2878 }
2879
2880 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2881 {
2882 struct btrfs_fs_info *fs_info = trans->fs_info;
2883 struct btrfs_block_group_cache *block_group, *tmp;
2884 struct list_head *deleted_bgs;
2885 struct extent_io_tree *unpin;
2886 u64 start;
2887 u64 end;
2888 int ret;
2889
2890 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2891 unpin = &fs_info->freed_extents[1];
2892 else
2893 unpin = &fs_info->freed_extents[0];
2894
2895 while (!trans->aborted) {
2896 struct extent_state *cached_state = NULL;
2897
2898 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2899 ret = find_first_extent_bit(unpin, 0, &start, &end,
2900 EXTENT_DIRTY, &cached_state);
2901 if (ret) {
2902 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2903 break;
2904 }
2905
2906 if (btrfs_test_opt(fs_info, DISCARD))
2907 ret = btrfs_discard_extent(fs_info, start,
2908 end + 1 - start, NULL);
2909
2910 clear_extent_dirty(unpin, start, end, &cached_state);
2911 unpin_extent_range(fs_info, start, end, true);
2912 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2913 free_extent_state(cached_state);
2914 cond_resched();
2915 }
2916
2917
2918
2919
2920
2921
2922 deleted_bgs = &trans->transaction->deleted_bgs;
2923 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2924 u64 trimmed = 0;
2925
2926 ret = -EROFS;
2927 if (!trans->aborted)
2928 ret = btrfs_discard_extent(fs_info,
2929 block_group->key.objectid,
2930 block_group->key.offset,
2931 &trimmed);
2932
2933 list_del_init(&block_group->bg_list);
2934 btrfs_put_block_group_trimming(block_group);
2935 btrfs_put_block_group(block_group);
2936
2937 if (ret) {
2938 const char *errstr = btrfs_decode_error(ret);
2939 btrfs_warn(fs_info,
2940 "discard failed while removing blockgroup: errno=%d %s",
2941 ret, errstr);
2942 }
2943 }
2944
2945 return 0;
2946 }
2947
2948 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2949 struct btrfs_delayed_ref_node *node, u64 parent,
2950 u64 root_objectid, u64 owner_objectid,
2951 u64 owner_offset, int refs_to_drop,
2952 struct btrfs_delayed_extent_op *extent_op)
2953 {
2954 struct btrfs_fs_info *info = trans->fs_info;
2955 struct btrfs_key key;
2956 struct btrfs_path *path;
2957 struct btrfs_root *extent_root = info->extent_root;
2958 struct extent_buffer *leaf;
2959 struct btrfs_extent_item *ei;
2960 struct btrfs_extent_inline_ref *iref;
2961 int ret;
2962 int is_data;
2963 int extent_slot = 0;
2964 int found_extent = 0;
2965 int num_to_del = 1;
2966 u32 item_size;
2967 u64 refs;
2968 u64 bytenr = node->bytenr;
2969 u64 num_bytes = node->num_bytes;
2970 int last_ref = 0;
2971 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2972
2973 path = btrfs_alloc_path();
2974 if (!path)
2975 return -ENOMEM;
2976
2977 path->reada = READA_FORWARD;
2978 path->leave_spinning = 1;
2979
2980 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2981 BUG_ON(!is_data && refs_to_drop != 1);
2982
2983 if (is_data)
2984 skinny_metadata = false;
2985
2986 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2987 parent, root_objectid, owner_objectid,
2988 owner_offset);
2989 if (ret == 0) {
2990 extent_slot = path->slots[0];
2991 while (extent_slot >= 0) {
2992 btrfs_item_key_to_cpu(path->nodes[0], &key,
2993 extent_slot);
2994 if (key.objectid != bytenr)
2995 break;
2996 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2997 key.offset == num_bytes) {
2998 found_extent = 1;
2999 break;
3000 }
3001 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3002 key.offset == owner_objectid) {
3003 found_extent = 1;
3004 break;
3005 }
3006 if (path->slots[0] - extent_slot > 5)
3007 break;
3008 extent_slot--;
3009 }
3010
3011 if (!found_extent) {
3012 BUG_ON(iref);
3013 ret = remove_extent_backref(trans, path, NULL,
3014 refs_to_drop,
3015 is_data, &last_ref);
3016 if (ret) {
3017 btrfs_abort_transaction(trans, ret);
3018 goto out;
3019 }
3020 btrfs_release_path(path);
3021 path->leave_spinning = 1;
3022
3023 key.objectid = bytenr;
3024 key.type = BTRFS_EXTENT_ITEM_KEY;
3025 key.offset = num_bytes;
3026
3027 if (!is_data && skinny_metadata) {
3028 key.type = BTRFS_METADATA_ITEM_KEY;
3029 key.offset = owner_objectid;
3030 }
3031
3032 ret = btrfs_search_slot(trans, extent_root,
3033 &key, path, -1, 1);
3034 if (ret > 0 && skinny_metadata && path->slots[0]) {
3035
3036
3037
3038
3039 path->slots[0]--;
3040 btrfs_item_key_to_cpu(path->nodes[0], &key,
3041 path->slots[0]);
3042 if (key.objectid == bytenr &&
3043 key.type == BTRFS_EXTENT_ITEM_KEY &&
3044 key.offset == num_bytes)
3045 ret = 0;
3046 }
3047
3048 if (ret > 0 && skinny_metadata) {
3049 skinny_metadata = false;
3050 key.objectid = bytenr;
3051 key.type = BTRFS_EXTENT_ITEM_KEY;
3052 key.offset = num_bytes;
3053 btrfs_release_path(path);
3054 ret = btrfs_search_slot(trans, extent_root,
3055 &key, path, -1, 1);
3056 }
3057
3058 if (ret) {
3059 btrfs_err(info,
3060 "umm, got %d back from search, was looking for %llu",
3061 ret, bytenr);
3062 if (ret > 0)
3063 btrfs_print_leaf(path->nodes[0]);
3064 }
3065 if (ret < 0) {
3066 btrfs_abort_transaction(trans, ret);
3067 goto out;
3068 }
3069 extent_slot = path->slots[0];
3070 }
3071 } else if (WARN_ON(ret == -ENOENT)) {
3072 btrfs_print_leaf(path->nodes[0]);
3073 btrfs_err(info,
3074 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3075 bytenr, parent, root_objectid, owner_objectid,
3076 owner_offset);
3077 btrfs_abort_transaction(trans, ret);
3078 goto out;
3079 } else {
3080 btrfs_abort_transaction(trans, ret);
3081 goto out;
3082 }
3083
3084 leaf = path->nodes[0];
3085 item_size = btrfs_item_size_nr(leaf, extent_slot);
3086 if (unlikely(item_size < sizeof(*ei))) {
3087 ret = -EINVAL;
3088 btrfs_print_v0_err(info);
3089 btrfs_abort_transaction(trans, ret);
3090 goto out;
3091 }
3092 ei = btrfs_item_ptr(leaf, extent_slot,
3093 struct btrfs_extent_item);
3094 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3095 key.type == BTRFS_EXTENT_ITEM_KEY) {
3096 struct btrfs_tree_block_info *bi;
3097 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
3098 bi = (struct btrfs_tree_block_info *)(ei + 1);
3099 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3100 }
3101
3102 refs = btrfs_extent_refs(leaf, ei);
3103 if (refs < refs_to_drop) {
3104 btrfs_err(info,
3105 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
3106 refs_to_drop, refs, bytenr);
3107 ret = -EINVAL;
3108 btrfs_abort_transaction(trans, ret);
3109 goto out;
3110 }
3111 refs -= refs_to_drop;
3112
3113 if (refs > 0) {
3114 if (extent_op)
3115 __run_delayed_extent_op(extent_op, leaf, ei);
3116
3117
3118
3119
3120 if (iref) {
3121 BUG_ON(!found_extent);
3122 } else {
3123 btrfs_set_extent_refs(leaf, ei, refs);
3124 btrfs_mark_buffer_dirty(leaf);
3125 }
3126 if (found_extent) {
3127 ret = remove_extent_backref(trans, path, iref,
3128 refs_to_drop, is_data,
3129 &last_ref);
3130 if (ret) {
3131 btrfs_abort_transaction(trans, ret);
3132 goto out;
3133 }
3134 }
3135 } else {
3136 if (found_extent) {
3137 BUG_ON(is_data && refs_to_drop !=
3138 extent_data_ref_count(path, iref));
3139 if (iref) {
3140 BUG_ON(path->slots[0] != extent_slot);
3141 } else {
3142 BUG_ON(path->slots[0] != extent_slot + 1);
3143 path->slots[0] = extent_slot;
3144 num_to_del = 2;
3145 }
3146 }
3147
3148 last_ref = 1;
3149 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3150 num_to_del);
3151 if (ret) {
3152 btrfs_abort_transaction(trans, ret);
3153 goto out;
3154 }
3155 btrfs_release_path(path);
3156
3157 if (is_data) {
3158 ret = btrfs_del_csums(trans, info->csum_root, bytenr,
3159 num_bytes);
3160 if (ret) {
3161 btrfs_abort_transaction(trans, ret);
3162 goto out;
3163 }
3164 }
3165
3166 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3167 if (ret) {
3168 btrfs_abort_transaction(trans, ret);
3169 goto out;
3170 }
3171
3172 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3173 if (ret) {
3174 btrfs_abort_transaction(trans, ret);
3175 goto out;
3176 }
3177 }
3178 btrfs_release_path(path);
3179
3180 out:
3181 btrfs_free_path(path);
3182 return ret;
3183 }
3184
3185
3186
3187
3188
3189
3190
3191 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3192 u64 bytenr)
3193 {
3194 struct btrfs_delayed_ref_head *head;
3195 struct btrfs_delayed_ref_root *delayed_refs;
3196 int ret = 0;
3197
3198 delayed_refs = &trans->transaction->delayed_refs;
3199 spin_lock(&delayed_refs->lock);
3200 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3201 if (!head)
3202 goto out_delayed_unlock;
3203
3204 spin_lock(&head->lock);
3205 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3206 goto out;
3207
3208 if (cleanup_extent_op(head) != NULL)
3209 goto out;
3210
3211
3212
3213
3214
3215 if (!mutex_trylock(&head->mutex))
3216 goto out;
3217
3218 btrfs_delete_ref_head(delayed_refs, head);
3219 head->processing = 0;
3220
3221 spin_unlock(&head->lock);
3222 spin_unlock(&delayed_refs->lock);
3223
3224 BUG_ON(head->extent_op);
3225 if (head->must_insert_reserved)
3226 ret = 1;
3227
3228 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3229 mutex_unlock(&head->mutex);
3230 btrfs_put_delayed_ref_head(head);
3231 return ret;
3232 out:
3233 spin_unlock(&head->lock);
3234
3235 out_delayed_unlock:
3236 spin_unlock(&delayed_refs->lock);
3237 return 0;
3238 }
3239
3240 void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct extent_buffer *buf,
3243 u64 parent, int last_ref)
3244 {
3245 struct btrfs_fs_info *fs_info = root->fs_info;
3246 struct btrfs_ref generic_ref = { 0 };
3247 int pin = 1;
3248 int ret;
3249
3250 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3251 buf->start, buf->len, parent);
3252 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3253 root->root_key.objectid);
3254
3255 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3256 int old_ref_mod, new_ref_mod;
3257
3258 btrfs_ref_tree_mod(fs_info, &generic_ref);
3259 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
3260 &old_ref_mod, &new_ref_mod);
3261 BUG_ON(ret);
3262 pin = old_ref_mod >= 0 && new_ref_mod < 0;
3263 }
3264
3265 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3266 struct btrfs_block_group_cache *cache;
3267
3268 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3269 ret = check_ref_cleanup(trans, buf->start);
3270 if (!ret)
3271 goto out;
3272 }
3273
3274 pin = 0;
3275 cache = btrfs_lookup_block_group(fs_info, buf->start);
3276
3277 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3278 pin_down_extent(cache, buf->start, buf->len, 1);
3279 btrfs_put_block_group(cache);
3280 goto out;
3281 }
3282
3283 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3284
3285 btrfs_add_free_space(cache, buf->start, buf->len);
3286 btrfs_free_reserved_bytes(cache, buf->len, 0);
3287 btrfs_put_block_group(cache);
3288 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3289 }
3290 out:
3291 if (pin)
3292 add_pinned_bytes(fs_info, &generic_ref);
3293
3294 if (last_ref) {
3295
3296
3297
3298
3299 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3300 }
3301 }
3302
3303
3304 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3305 {
3306 struct btrfs_fs_info *fs_info = trans->fs_info;
3307 int old_ref_mod, new_ref_mod;
3308 int ret;
3309
3310 if (btrfs_is_testing(fs_info))
3311 return 0;
3312
3313
3314
3315
3316
3317 if ((ref->type == BTRFS_REF_METADATA &&
3318 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3319 (ref->type == BTRFS_REF_DATA &&
3320 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3321
3322 btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
3323 old_ref_mod = new_ref_mod = 0;
3324 ret = 0;
3325 } else if (ref->type == BTRFS_REF_METADATA) {
3326 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
3327 &old_ref_mod, &new_ref_mod);
3328 } else {
3329 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
3330 &old_ref_mod, &new_ref_mod);
3331 }
3332
3333 if (!((ref->type == BTRFS_REF_METADATA &&
3334 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3335 (ref->type == BTRFS_REF_DATA &&
3336 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3337 btrfs_ref_tree_mod(fs_info, ref);
3338
3339 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
3340 add_pinned_bytes(fs_info, ref);
3341
3342 return ret;
3343 }
3344
3345 enum btrfs_loop_type {
3346 LOOP_CACHING_NOWAIT,
3347 LOOP_CACHING_WAIT,
3348 LOOP_ALLOC_CHUNK,
3349 LOOP_NO_EMPTY_SIZE,
3350 };
3351
3352 static inline void
3353 btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
3354 int delalloc)
3355 {
3356 if (delalloc)
3357 down_read(&cache->data_rwsem);
3358 }
3359
3360 static inline void
3361 btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
3362 int delalloc)
3363 {
3364 btrfs_get_block_group(cache);
3365 if (delalloc)
3366 down_read(&cache->data_rwsem);
3367 }
3368
3369 static struct btrfs_block_group_cache *
3370 btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
3371 struct btrfs_free_cluster *cluster,
3372 int delalloc)
3373 {
3374 struct btrfs_block_group_cache *used_bg = NULL;
3375
3376 spin_lock(&cluster->refill_lock);
3377 while (1) {
3378 used_bg = cluster->block_group;
3379 if (!used_bg)
3380 return NULL;
3381
3382 if (used_bg == block_group)
3383 return used_bg;
3384
3385 btrfs_get_block_group(used_bg);
3386
3387 if (!delalloc)
3388 return used_bg;
3389
3390 if (down_read_trylock(&used_bg->data_rwsem))
3391 return used_bg;
3392
3393 spin_unlock(&cluster->refill_lock);
3394
3395
3396 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3397
3398 spin_lock(&cluster->refill_lock);
3399 if (used_bg == cluster->block_group)
3400 return used_bg;
3401
3402 up_read(&used_bg->data_rwsem);
3403 btrfs_put_block_group(used_bg);
3404 }
3405 }
3406
3407 static inline void
3408 btrfs_release_block_group(struct btrfs_block_group_cache *cache,
3409 int delalloc)
3410 {
3411 if (delalloc)
3412 up_read(&cache->data_rwsem);
3413 btrfs_put_block_group(cache);
3414 }
3415
3416
3417
3418
3419
3420 struct find_free_extent_ctl {
3421
3422 u64 ram_bytes;
3423 u64 num_bytes;
3424 u64 empty_size;
3425 u64 flags;
3426 int delalloc;
3427
3428
3429 u64 search_start;
3430
3431
3432 u64 empty_cluster;
3433
3434 bool have_caching_bg;
3435 bool orig_have_caching_bg;
3436
3437
3438 int index;
3439
3440
3441
3442
3443 int loop;
3444
3445
3446
3447
3448
3449 bool retry_clustered;
3450
3451
3452
3453
3454
3455 bool retry_unclustered;
3456
3457
3458 int cached;
3459
3460
3461 u64 max_extent_size;
3462
3463
3464 u64 total_free_space;
3465
3466
3467 u64 found_offset;
3468 };
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479 static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
3480 struct btrfs_free_cluster *last_ptr,
3481 struct find_free_extent_ctl *ffe_ctl,
3482 struct btrfs_block_group_cache **cluster_bg_ret)
3483 {
3484 struct btrfs_block_group_cache *cluster_bg;
3485 u64 aligned_cluster;
3486 u64 offset;
3487 int ret;
3488
3489 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3490 if (!cluster_bg)
3491 goto refill_cluster;
3492 if (cluster_bg != bg && (cluster_bg->ro ||
3493 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3494 goto release_cluster;
3495
3496 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3497 ffe_ctl->num_bytes, cluster_bg->key.objectid,
3498 &ffe_ctl->max_extent_size);
3499 if (offset) {
3500
3501 spin_unlock(&last_ptr->refill_lock);
3502 trace_btrfs_reserve_extent_cluster(cluster_bg,
3503 ffe_ctl->search_start, ffe_ctl->num_bytes);
3504 *cluster_bg_ret = cluster_bg;
3505 ffe_ctl->found_offset = offset;
3506 return 0;
3507 }
3508 WARN_ON(last_ptr->block_group != cluster_bg);
3509
3510 release_cluster:
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3523 spin_unlock(&last_ptr->refill_lock);
3524 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3525 return -ENOENT;
3526 }
3527
3528
3529 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3530
3531 if (cluster_bg != bg)
3532 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3533
3534 refill_cluster:
3535 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3536 spin_unlock(&last_ptr->refill_lock);
3537 return -ENOENT;
3538 }
3539
3540 aligned_cluster = max_t(u64,
3541 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3542 bg->full_stripe_len);
3543 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3544 ffe_ctl->num_bytes, aligned_cluster);
3545 if (ret == 0) {
3546
3547 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3548 ffe_ctl->num_bytes, ffe_ctl->search_start,
3549 &ffe_ctl->max_extent_size);
3550 if (offset) {
3551
3552 spin_unlock(&last_ptr->refill_lock);
3553 trace_btrfs_reserve_extent_cluster(bg,
3554 ffe_ctl->search_start,
3555 ffe_ctl->num_bytes);
3556 ffe_ctl->found_offset = offset;
3557 return 0;
3558 }
3559 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3560 !ffe_ctl->retry_clustered) {
3561 spin_unlock(&last_ptr->refill_lock);
3562
3563 ffe_ctl->retry_clustered = true;
3564 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3565 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3566 return -EAGAIN;
3567 }
3568
3569
3570
3571
3572
3573 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3574 spin_unlock(&last_ptr->refill_lock);
3575 return 1;
3576 }
3577
3578
3579
3580
3581
3582
3583 static int find_free_extent_unclustered(struct btrfs_block_group_cache *bg,
3584 struct btrfs_free_cluster *last_ptr,
3585 struct find_free_extent_ctl *ffe_ctl)
3586 {
3587 u64 offset;
3588
3589
3590
3591
3592
3593
3594 if (unlikely(last_ptr)) {
3595 spin_lock(&last_ptr->lock);
3596 last_ptr->fragmented = 1;
3597 spin_unlock(&last_ptr->lock);
3598 }
3599 if (ffe_ctl->cached) {
3600 struct btrfs_free_space_ctl *free_space_ctl;
3601
3602 free_space_ctl = bg->free_space_ctl;
3603 spin_lock(&free_space_ctl->tree_lock);
3604 if (free_space_ctl->free_space <
3605 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3606 ffe_ctl->empty_size) {
3607 ffe_ctl->total_free_space = max_t(u64,
3608 ffe_ctl->total_free_space,
3609 free_space_ctl->free_space);
3610 spin_unlock(&free_space_ctl->tree_lock);
3611 return 1;
3612 }
3613 spin_unlock(&free_space_ctl->tree_lock);
3614 }
3615
3616 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3617 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3618 &ffe_ctl->max_extent_size);
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3630 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3631 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3632 ffe_ctl->empty_size);
3633 ffe_ctl->retry_unclustered = true;
3634 return -EAGAIN;
3635 } else if (!offset) {
3636 return 1;
3637 }
3638 ffe_ctl->found_offset = offset;
3639 return 0;
3640 }
3641
3642
3643
3644
3645
3646
3647 static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3648 struct btrfs_free_cluster *last_ptr,
3649 struct btrfs_key *ins,
3650 struct find_free_extent_ctl *ffe_ctl,
3651 int full_search, bool use_cluster)
3652 {
3653 struct btrfs_root *root = fs_info->extent_root;
3654 int ret;
3655
3656 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3657 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3658 ffe_ctl->orig_have_caching_bg = true;
3659
3660 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3661 ffe_ctl->have_caching_bg)
3662 return 1;
3663
3664 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3665 return 1;
3666
3667 if (ins->objectid) {
3668 if (!use_cluster && last_ptr) {
3669 spin_lock(&last_ptr->lock);
3670 last_ptr->window_start = ins->objectid;
3671 spin_unlock(&last_ptr->lock);
3672 }
3673 return 0;
3674 }
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3685 ffe_ctl->index = 0;
3686 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3687
3688
3689
3690
3691
3692 if (ffe_ctl->orig_have_caching_bg || !full_search)
3693 ffe_ctl->loop = LOOP_CACHING_WAIT;
3694 else
3695 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3696 } else {
3697 ffe_ctl->loop++;
3698 }
3699
3700 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3701 struct btrfs_trans_handle *trans;
3702 int exist = 0;
3703
3704 trans = current->journal_info;
3705 if (trans)
3706 exist = 1;
3707 else
3708 trans = btrfs_join_transaction(root);
3709
3710 if (IS_ERR(trans)) {
3711 ret = PTR_ERR(trans);
3712 return ret;
3713 }
3714
3715 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3716 CHUNK_ALLOC_FORCE);
3717
3718
3719
3720
3721
3722
3723 if (ret == -ENOSPC)
3724 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3725
3726
3727 if (ret < 0 && ret != -ENOSPC)
3728 btrfs_abort_transaction(trans, ret);
3729 else
3730 ret = 0;
3731 if (!exist)
3732 btrfs_end_transaction(trans);
3733 if (ret)
3734 return ret;
3735 }
3736
3737 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
3738
3739
3740
3741
3742 if (ffe_ctl->empty_size == 0 &&
3743 ffe_ctl->empty_cluster == 0)
3744 return -ENOSPC;
3745 ffe_ctl->empty_size = 0;
3746 ffe_ctl->empty_cluster = 0;
3747 }
3748 return 1;
3749 }
3750 return -ENOSPC;
3751 }
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778 static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
3779 u64 ram_bytes, u64 num_bytes, u64 empty_size,
3780 u64 hint_byte, struct btrfs_key *ins,
3781 u64 flags, int delalloc)
3782 {
3783 int ret = 0;
3784 int cache_block_group_error = 0;
3785 struct btrfs_free_cluster *last_ptr = NULL;
3786 struct btrfs_block_group_cache *block_group = NULL;
3787 struct find_free_extent_ctl ffe_ctl = {0};
3788 struct btrfs_space_info *space_info;
3789 bool use_cluster = true;
3790 bool full_search = false;
3791
3792 WARN_ON(num_bytes < fs_info->sectorsize);
3793
3794 ffe_ctl.ram_bytes = ram_bytes;
3795 ffe_ctl.num_bytes = num_bytes;
3796 ffe_ctl.empty_size = empty_size;
3797 ffe_ctl.flags = flags;
3798 ffe_ctl.search_start = 0;
3799 ffe_ctl.retry_clustered = false;
3800 ffe_ctl.retry_unclustered = false;
3801 ffe_ctl.delalloc = delalloc;
3802 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
3803 ffe_ctl.have_caching_bg = false;
3804 ffe_ctl.orig_have_caching_bg = false;
3805 ffe_ctl.found_offset = 0;
3806
3807 ins->type = BTRFS_EXTENT_ITEM_KEY;
3808 ins->objectid = 0;
3809 ins->offset = 0;
3810
3811 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
3812
3813 space_info = btrfs_find_space_info(fs_info, flags);
3814 if (!space_info) {
3815 btrfs_err(fs_info, "No space info for %llu", flags);
3816 return -ENOSPC;
3817 }
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829 if (unlikely(space_info->max_extent_size)) {
3830 spin_lock(&space_info->lock);
3831 if (space_info->max_extent_size &&
3832 num_bytes > space_info->max_extent_size) {
3833 ins->offset = space_info->max_extent_size;
3834 spin_unlock(&space_info->lock);
3835 return -ENOSPC;
3836 } else if (space_info->max_extent_size) {
3837 use_cluster = false;
3838 }
3839 spin_unlock(&space_info->lock);
3840 }
3841
3842 last_ptr = fetch_cluster_info(fs_info, space_info,
3843 &ffe_ctl.empty_cluster);
3844 if (last_ptr) {
3845 spin_lock(&last_ptr->lock);
3846 if (last_ptr->block_group)
3847 hint_byte = last_ptr->window_start;
3848 if (last_ptr->fragmented) {
3849
3850
3851
3852
3853
3854 hint_byte = last_ptr->window_start;
3855 use_cluster = false;
3856 }
3857 spin_unlock(&last_ptr->lock);
3858 }
3859
3860 ffe_ctl.search_start = max(ffe_ctl.search_start,
3861 first_logical_byte(fs_info, 0));
3862 ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
3863 if (ffe_ctl.search_start == hint_byte) {
3864 block_group = btrfs_lookup_block_group(fs_info,
3865 ffe_ctl.search_start);
3866
3867
3868
3869
3870
3871
3872
3873 if (block_group && block_group_bits(block_group, flags) &&
3874 block_group->cached != BTRFS_CACHE_NO) {
3875 down_read(&space_info->groups_sem);
3876 if (list_empty(&block_group->list) ||
3877 block_group->ro) {
3878
3879
3880
3881
3882
3883
3884 btrfs_put_block_group(block_group);
3885 up_read(&space_info->groups_sem);
3886 } else {
3887 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
3888 block_group->flags);
3889 btrfs_lock_block_group(block_group, delalloc);
3890 goto have_block_group;
3891 }
3892 } else if (block_group) {
3893 btrfs_put_block_group(block_group);
3894 }
3895 }
3896 search:
3897 ffe_ctl.have_caching_bg = false;
3898 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
3899 ffe_ctl.index == 0)
3900 full_search = true;
3901 down_read(&space_info->groups_sem);
3902 list_for_each_entry(block_group,
3903 &space_info->block_groups[ffe_ctl.index], list) {
3904
3905 if (unlikely(block_group->ro))
3906 continue;
3907
3908 btrfs_grab_block_group(block_group, delalloc);
3909 ffe_ctl.search_start = block_group->key.objectid;
3910
3911
3912
3913
3914
3915
3916 if (!block_group_bits(block_group, flags)) {
3917 u64 extra = BTRFS_BLOCK_GROUP_DUP |
3918 BTRFS_BLOCK_GROUP_RAID1_MASK |
3919 BTRFS_BLOCK_GROUP_RAID56_MASK |
3920 BTRFS_BLOCK_GROUP_RAID10;
3921
3922
3923
3924
3925
3926
3927 if ((flags & extra) && !(block_group->flags & extra))
3928 goto loop;
3929
3930
3931
3932
3933
3934
3935 btrfs_release_block_group(block_group, delalloc);
3936 continue;
3937 }
3938
3939 have_block_group:
3940 ffe_ctl.cached = btrfs_block_group_cache_done(block_group);
3941 if (unlikely(!ffe_ctl.cached)) {
3942 ffe_ctl.have_caching_bg = true;
3943 ret = btrfs_cache_block_group(block_group, 0);
3944
3945
3946
3947
3948
3949
3950
3951
3952 if (ret < 0) {
3953 if (!cache_block_group_error)
3954 cache_block_group_error = ret;
3955 ret = 0;
3956 goto loop;
3957 }
3958 ret = 0;
3959 }
3960
3961 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
3962 goto loop;
3963
3964
3965
3966
3967
3968 if (last_ptr && use_cluster) {
3969 struct btrfs_block_group_cache *cluster_bg = NULL;
3970
3971 ret = find_free_extent_clustered(block_group, last_ptr,
3972 &ffe_ctl, &cluster_bg);
3973
3974 if (ret == 0) {
3975 if (cluster_bg && cluster_bg != block_group) {
3976 btrfs_release_block_group(block_group,
3977 delalloc);
3978 block_group = cluster_bg;
3979 }
3980 goto checks;
3981 } else if (ret == -EAGAIN) {
3982 goto have_block_group;
3983 } else if (ret > 0) {
3984 goto loop;
3985 }
3986
3987 }
3988
3989 ret = find_free_extent_unclustered(block_group, last_ptr,
3990 &ffe_ctl);
3991 if (ret == -EAGAIN)
3992 goto have_block_group;
3993 else if (ret > 0)
3994 goto loop;
3995
3996 checks:
3997 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
3998 fs_info->stripesize);
3999
4000
4001 if (ffe_ctl.search_start + num_bytes >
4002 block_group->key.objectid + block_group->key.offset) {
4003 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4004 num_bytes);
4005 goto loop;
4006 }
4007
4008 if (ffe_ctl.found_offset < ffe_ctl.search_start)
4009 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4010 ffe_ctl.search_start - ffe_ctl.found_offset);
4011
4012 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
4013 num_bytes, delalloc);
4014 if (ret == -EAGAIN) {
4015 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4016 num_bytes);
4017 goto loop;
4018 }
4019 btrfs_inc_block_group_reservations(block_group);
4020
4021
4022 ins->objectid = ffe_ctl.search_start;
4023 ins->offset = num_bytes;
4024
4025 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4026 num_bytes);
4027 btrfs_release_block_group(block_group, delalloc);
4028 break;
4029 loop:
4030 ffe_ctl.retry_clustered = false;
4031 ffe_ctl.retry_unclustered = false;
4032 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
4033 ffe_ctl.index);
4034 btrfs_release_block_group(block_group, delalloc);
4035 cond_resched();
4036 }
4037 up_read(&space_info->groups_sem);
4038
4039 ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
4040 full_search, use_cluster);
4041 if (ret > 0)
4042 goto search;
4043
4044 if (ret == -ENOSPC && !cache_block_group_error) {
4045
4046
4047
4048
4049 if (!ffe_ctl.max_extent_size)
4050 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4051 spin_lock(&space_info->lock);
4052 space_info->max_extent_size = ffe_ctl.max_extent_size;
4053 spin_unlock(&space_info->lock);
4054 ins->offset = ffe_ctl.max_extent_size;
4055 } else if (ret == -ENOSPC) {
4056 ret = cache_block_group_error;
4057 }
4058 return ret;
4059 }
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4107 u64 num_bytes, u64 min_alloc_size,
4108 u64 empty_size, u64 hint_byte,
4109 struct btrfs_key *ins, int is_data, int delalloc)
4110 {
4111 struct btrfs_fs_info *fs_info = root->fs_info;
4112 bool final_tried = num_bytes == min_alloc_size;
4113 u64 flags;
4114 int ret;
4115
4116 flags = get_alloc_profile_by_root(root, is_data);
4117 again:
4118 WARN_ON(num_bytes < fs_info->sectorsize);
4119 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
4120 hint_byte, ins, flags, delalloc);
4121 if (!ret && !is_data) {
4122 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4123 } else if (ret == -ENOSPC) {
4124 if (!final_tried && ins->offset) {
4125 num_bytes = min(num_bytes >> 1, ins->offset);
4126 num_bytes = round_down(num_bytes,
4127 fs_info->sectorsize);
4128 num_bytes = max(num_bytes, min_alloc_size);
4129 ram_bytes = num_bytes;
4130 if (num_bytes == min_alloc_size)
4131 final_tried = true;
4132 goto again;
4133 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4134 struct btrfs_space_info *sinfo;
4135
4136 sinfo = btrfs_find_space_info(fs_info, flags);
4137 btrfs_err(fs_info,
4138 "allocation failed flags %llu, wanted %llu",
4139 flags, num_bytes);
4140 if (sinfo)
4141 btrfs_dump_space_info(fs_info, sinfo,
4142 num_bytes, 1);
4143 }
4144 }
4145
4146 return ret;
4147 }
4148
4149 static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4150 u64 start, u64 len,
4151 int pin, int delalloc)
4152 {
4153 struct btrfs_block_group_cache *cache;
4154 int ret = 0;
4155
4156 cache = btrfs_lookup_block_group(fs_info, start);
4157 if (!cache) {
4158 btrfs_err(fs_info, "Unable to find block group for %llu",
4159 start);
4160 return -ENOSPC;
4161 }
4162
4163 if (pin)
4164 pin_down_extent(cache, start, len, 1);
4165 else {
4166 if (btrfs_test_opt(fs_info, DISCARD))
4167 ret = btrfs_discard_extent(fs_info, start, len, NULL);
4168 btrfs_add_free_space(cache, start, len);
4169 btrfs_free_reserved_bytes(cache, len, delalloc);
4170 trace_btrfs_reserved_extent_free(fs_info, start, len);
4171 }
4172
4173 btrfs_put_block_group(cache);
4174 return ret;
4175 }
4176
4177 int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4178 u64 start, u64 len, int delalloc)
4179 {
4180 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
4181 }
4182
4183 int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
4184 u64 start, u64 len)
4185 {
4186 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
4187 }
4188
4189 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4190 u64 parent, u64 root_objectid,
4191 u64 flags, u64 owner, u64 offset,
4192 struct btrfs_key *ins, int ref_mod)
4193 {
4194 struct btrfs_fs_info *fs_info = trans->fs_info;
4195 int ret;
4196 struct btrfs_extent_item *extent_item;
4197 struct btrfs_extent_inline_ref *iref;
4198 struct btrfs_path *path;
4199 struct extent_buffer *leaf;
4200 int type;
4201 u32 size;
4202
4203 if (parent > 0)
4204 type = BTRFS_SHARED_DATA_REF_KEY;
4205 else
4206 type = BTRFS_EXTENT_DATA_REF_KEY;
4207
4208 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4209
4210 path = btrfs_alloc_path();
4211 if (!path)
4212 return -ENOMEM;
4213
4214 path->leave_spinning = 1;
4215 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4216 ins, size);
4217 if (ret) {
4218 btrfs_free_path(path);
4219 return ret;
4220 }
4221
4222 leaf = path->nodes[0];
4223 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4224 struct btrfs_extent_item);
4225 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4226 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4227 btrfs_set_extent_flags(leaf, extent_item,
4228 flags | BTRFS_EXTENT_FLAG_DATA);
4229
4230 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4231 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4232 if (parent > 0) {
4233 struct btrfs_shared_data_ref *ref;
4234 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4235 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4236 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4237 } else {
4238 struct btrfs_extent_data_ref *ref;
4239 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4240 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4241 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4242 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4243 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4244 }
4245
4246 btrfs_mark_buffer_dirty(path->nodes[0]);
4247 btrfs_free_path(path);
4248
4249 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4250 if (ret)
4251 return ret;
4252
4253 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4254 if (ret) {
4255 btrfs_err(fs_info, "update block group failed for %llu %llu",
4256 ins->objectid, ins->offset);
4257 BUG();
4258 }
4259 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4260 return ret;
4261 }
4262
4263 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4264 struct btrfs_delayed_ref_node *node,
4265 struct btrfs_delayed_extent_op *extent_op)
4266 {
4267 struct btrfs_fs_info *fs_info = trans->fs_info;
4268 int ret;
4269 struct btrfs_extent_item *extent_item;
4270 struct btrfs_key extent_key;
4271 struct btrfs_tree_block_info *block_info;
4272 struct btrfs_extent_inline_ref *iref;
4273 struct btrfs_path *path;
4274 struct extent_buffer *leaf;
4275 struct btrfs_delayed_tree_ref *ref;
4276 u32 size = sizeof(*extent_item) + sizeof(*iref);
4277 u64 num_bytes;
4278 u64 flags = extent_op->flags_to_set;
4279 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4280
4281 ref = btrfs_delayed_node_to_tree_ref(node);
4282
4283 extent_key.objectid = node->bytenr;
4284 if (skinny_metadata) {
4285 extent_key.offset = ref->level;
4286 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4287 num_bytes = fs_info->nodesize;
4288 } else {
4289 extent_key.offset = node->num_bytes;
4290 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4291 size += sizeof(*block_info);
4292 num_bytes = node->num_bytes;
4293 }
4294
4295 path = btrfs_alloc_path();
4296 if (!path)
4297 return -ENOMEM;
4298
4299 path->leave_spinning = 1;
4300 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4301 &extent_key, size);
4302 if (ret) {
4303 btrfs_free_path(path);
4304 return ret;
4305 }
4306
4307 leaf = path->nodes[0];
4308 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4309 struct btrfs_extent_item);
4310 btrfs_set_extent_refs(leaf, extent_item, 1);
4311 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4312 btrfs_set_extent_flags(leaf, extent_item,
4313 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4314
4315 if (skinny_metadata) {
4316 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4317 } else {
4318 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4319 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4320 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4321 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4322 }
4323
4324 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4325 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4326 btrfs_set_extent_inline_ref_type(leaf, iref,
4327 BTRFS_SHARED_BLOCK_REF_KEY);
4328 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4329 } else {
4330 btrfs_set_extent_inline_ref_type(leaf, iref,
4331 BTRFS_TREE_BLOCK_REF_KEY);
4332 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4333 }
4334
4335 btrfs_mark_buffer_dirty(leaf);
4336 btrfs_free_path(path);
4337
4338 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4339 num_bytes);
4340 if (ret)
4341 return ret;
4342
4343 ret = btrfs_update_block_group(trans, extent_key.objectid,
4344 fs_info->nodesize, 1);
4345 if (ret) {
4346 btrfs_err(fs_info, "update block group failed for %llu %llu",
4347 extent_key.objectid, extent_key.offset);
4348 BUG();
4349 }
4350
4351 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4352 fs_info->nodesize);
4353 return ret;
4354 }
4355
4356 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4357 struct btrfs_root *root, u64 owner,
4358 u64 offset, u64 ram_bytes,
4359 struct btrfs_key *ins)
4360 {
4361 struct btrfs_ref generic_ref = { 0 };
4362 int ret;
4363
4364 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4365
4366 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4367 ins->objectid, ins->offset, 0);
4368 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4369 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4370 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
4371 ram_bytes, NULL, NULL);
4372 return ret;
4373 }
4374
4375
4376
4377
4378
4379
4380 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4381 u64 root_objectid, u64 owner, u64 offset,
4382 struct btrfs_key *ins)
4383 {
4384 struct btrfs_fs_info *fs_info = trans->fs_info;
4385 int ret;
4386 struct btrfs_block_group_cache *block_group;
4387 struct btrfs_space_info *space_info;
4388
4389
4390
4391
4392
4393 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4394 ret = __exclude_logged_extent(fs_info, ins->objectid,
4395 ins->offset);
4396 if (ret)
4397 return ret;
4398 }
4399
4400 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4401 if (!block_group)
4402 return -EINVAL;
4403
4404 space_info = block_group->space_info;
4405 spin_lock(&space_info->lock);
4406 spin_lock(&block_group->lock);
4407 space_info->bytes_reserved += ins->offset;
4408 block_group->reserved += ins->offset;
4409 spin_unlock(&block_group->lock);
4410 spin_unlock(&space_info->lock);
4411
4412 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4413 offset, ins, 1);
4414 if (ret)
4415 btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1);
4416 btrfs_put_block_group(block_group);
4417 return ret;
4418 }
4419
4420 static struct extent_buffer *
4421 btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4422 u64 bytenr, int level, u64 owner)
4423 {
4424 struct btrfs_fs_info *fs_info = root->fs_info;
4425 struct extent_buffer *buf;
4426
4427 buf = btrfs_find_create_tree_block(fs_info, bytenr);
4428 if (IS_ERR(buf))
4429 return buf;
4430
4431
4432
4433
4434
4435
4436 if (buf->lock_owner == current->pid) {
4437 btrfs_err_rl(fs_info,
4438 "tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4439 buf->start, btrfs_header_owner(buf), current->pid);
4440 free_extent_buffer(buf);
4441 return ERR_PTR(-EUCLEAN);
4442 }
4443
4444 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
4445 btrfs_tree_lock(buf);
4446 btrfs_clean_tree_block(buf);
4447 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4448
4449 btrfs_set_lock_blocking_write(buf);
4450 set_extent_buffer_uptodate(buf);
4451
4452 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4453 btrfs_set_header_level(buf, level);
4454 btrfs_set_header_bytenr(buf, buf->start);
4455 btrfs_set_header_generation(buf, trans->transid);
4456 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4457 btrfs_set_header_owner(buf, owner);
4458 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4459 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4460 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4461 buf->log_index = root->log_transid % 2;
4462
4463
4464
4465
4466 if (buf->log_index == 0)
4467 set_extent_dirty(&root->dirty_log_pages, buf->start,
4468 buf->start + buf->len - 1, GFP_NOFS);
4469 else
4470 set_extent_new(&root->dirty_log_pages, buf->start,
4471 buf->start + buf->len - 1);
4472 } else {
4473 buf->log_index = -1;
4474 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4475 buf->start + buf->len - 1, GFP_NOFS);
4476 }
4477 trans->dirty = true;
4478
4479 return buf;
4480 }
4481
4482
4483
4484
4485
4486 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4487 struct btrfs_root *root,
4488 u64 parent, u64 root_objectid,
4489 const struct btrfs_disk_key *key,
4490 int level, u64 hint,
4491 u64 empty_size)
4492 {
4493 struct btrfs_fs_info *fs_info = root->fs_info;
4494 struct btrfs_key ins;
4495 struct btrfs_block_rsv *block_rsv;
4496 struct extent_buffer *buf;
4497 struct btrfs_delayed_extent_op *extent_op;
4498 struct btrfs_ref generic_ref = { 0 };
4499 u64 flags = 0;
4500 int ret;
4501 u32 blocksize = fs_info->nodesize;
4502 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4503
4504 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4505 if (btrfs_is_testing(fs_info)) {
4506 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4507 level, root_objectid);
4508 if (!IS_ERR(buf))
4509 root->alloc_bytenr += blocksize;
4510 return buf;
4511 }
4512 #endif
4513
4514 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4515 if (IS_ERR(block_rsv))
4516 return ERR_CAST(block_rsv);
4517
4518 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4519 empty_size, hint, &ins, 0, 0);
4520 if (ret)
4521 goto out_unuse;
4522
4523 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4524 root_objectid);
4525 if (IS_ERR(buf)) {
4526 ret = PTR_ERR(buf);
4527 goto out_free_reserved;
4528 }
4529
4530 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4531 if (parent == 0)
4532 parent = ins.objectid;
4533 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4534 } else
4535 BUG_ON(parent > 0);
4536
4537 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4538 extent_op = btrfs_alloc_delayed_extent_op();
4539 if (!extent_op) {
4540 ret = -ENOMEM;
4541 goto out_free_buf;
4542 }
4543 if (key)
4544 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4545 else
4546 memset(&extent_op->key, 0, sizeof(extent_op->key));
4547 extent_op->flags_to_set = flags;
4548 extent_op->update_key = skinny_metadata ? false : true;
4549 extent_op->update_flags = true;
4550 extent_op->is_data = false;
4551 extent_op->level = level;
4552
4553 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4554 ins.objectid, ins.offset, parent);
4555 generic_ref.real_root = root->root_key.objectid;
4556 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4557 btrfs_ref_tree_mod(fs_info, &generic_ref);
4558 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
4559 extent_op, NULL, NULL);
4560 if (ret)
4561 goto out_free_delayed;
4562 }
4563 return buf;
4564
4565 out_free_delayed:
4566 btrfs_free_delayed_extent_op(extent_op);
4567 out_free_buf:
4568 free_extent_buffer(buf);
4569 out_free_reserved:
4570 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4571 out_unuse:
4572 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4573 return ERR_PTR(ret);
4574 }
4575
4576 struct walk_control {
4577 u64 refs[BTRFS_MAX_LEVEL];
4578 u64 flags[BTRFS_MAX_LEVEL];
4579 struct btrfs_key update_progress;
4580 struct btrfs_key drop_progress;
4581 int drop_level;
4582 int stage;
4583 int level;
4584 int shared_level;
4585 int update_ref;
4586 int keep_locks;
4587 int reada_slot;
4588 int reada_count;
4589 int restarted;
4590 };
4591
4592 #define DROP_REFERENCE 1
4593 #define UPDATE_BACKREF 2
4594
4595 static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4596 struct btrfs_root *root,
4597 struct walk_control *wc,
4598 struct btrfs_path *path)
4599 {
4600 struct btrfs_fs_info *fs_info = root->fs_info;
4601 u64 bytenr;
4602 u64 generation;
4603 u64 refs;
4604 u64 flags;
4605 u32 nritems;
4606 struct btrfs_key key;
4607 struct extent_buffer *eb;
4608 int ret;
4609 int slot;
4610 int nread = 0;
4611
4612 if (path->slots[wc->level] < wc->reada_slot) {
4613 wc->reada_count = wc->reada_count * 2 / 3;
4614 wc->reada_count = max(wc->reada_count, 2);
4615 } else {
4616 wc->reada_count = wc->reada_count * 3 / 2;
4617 wc->reada_count = min_t(int, wc->reada_count,
4618 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4619 }
4620
4621 eb = path->nodes[wc->level];
4622 nritems = btrfs_header_nritems(eb);
4623
4624 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4625 if (nread >= wc->reada_count)
4626 break;
4627
4628 cond_resched();
4629 bytenr = btrfs_node_blockptr(eb, slot);
4630 generation = btrfs_node_ptr_generation(eb, slot);
4631
4632 if (slot == path->slots[wc->level])
4633 goto reada;
4634
4635 if (wc->stage == UPDATE_BACKREF &&
4636 generation <= root->root_key.offset)
4637 continue;
4638
4639
4640 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4641 wc->level - 1, 1, &refs,
4642 &flags);
4643
4644 if (ret < 0)
4645 continue;
4646 BUG_ON(refs == 0);
4647
4648 if (wc->stage == DROP_REFERENCE) {
4649 if (refs == 1)
4650 goto reada;
4651
4652 if (wc->level == 1 &&
4653 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4654 continue;
4655 if (!wc->update_ref ||
4656 generation <= root->root_key.offset)
4657 continue;
4658 btrfs_node_key_to_cpu(eb, &key, slot);
4659 ret = btrfs_comp_cpu_keys(&key,
4660 &wc->update_progress);
4661 if (ret < 0)
4662 continue;
4663 } else {
4664 if (wc->level == 1 &&
4665 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4666 continue;
4667 }
4668 reada:
4669 readahead_tree_block(fs_info, bytenr);
4670 nread++;
4671 }
4672 wc->reada_slot = slot;
4673 }
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683 static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4684 struct btrfs_root *root,
4685 struct btrfs_path *path,
4686 struct walk_control *wc, int lookup_info)
4687 {
4688 struct btrfs_fs_info *fs_info = root->fs_info;
4689 int level = wc->level;
4690 struct extent_buffer *eb = path->nodes[level];
4691 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4692 int ret;
4693
4694 if (wc->stage == UPDATE_BACKREF &&
4695 btrfs_header_owner(eb) != root->root_key.objectid)
4696 return 1;
4697
4698
4699
4700
4701
4702 if (lookup_info &&
4703 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4704 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4705 BUG_ON(!path->locks[level]);
4706 ret = btrfs_lookup_extent_info(trans, fs_info,
4707 eb->start, level, 1,
4708 &wc->refs[level],
4709 &wc->flags[level]);
4710 BUG_ON(ret == -ENOMEM);
4711 if (ret)
4712 return ret;
4713 BUG_ON(wc->refs[level] == 0);
4714 }
4715
4716 if (wc->stage == DROP_REFERENCE) {
4717 if (wc->refs[level] > 1)
4718 return 1;
4719
4720 if (path->locks[level] && !wc->keep_locks) {
4721 btrfs_tree_unlock_rw(eb, path->locks[level]);
4722 path->locks[level] = 0;
4723 }
4724 return 0;
4725 }
4726
4727
4728 if (!(wc->flags[level] & flag)) {
4729 BUG_ON(!path->locks[level]);
4730 ret = btrfs_inc_ref(trans, root, eb, 1);
4731 BUG_ON(ret);
4732 ret = btrfs_dec_ref(trans, root, eb, 0);
4733 BUG_ON(ret);
4734 ret = btrfs_set_disk_extent_flags(trans, eb->start,
4735 eb->len, flag,
4736 btrfs_header_level(eb), 0);
4737 BUG_ON(ret);
4738 wc->flags[level] |= flag;
4739 }
4740
4741
4742
4743
4744
4745 if (path->locks[level] && level > 0) {
4746 btrfs_tree_unlock_rw(eb, path->locks[level]);
4747 path->locks[level] = 0;
4748 }
4749 return 0;
4750 }
4751
4752
4753
4754
4755
4756 static int check_ref_exists(struct btrfs_trans_handle *trans,
4757 struct btrfs_root *root, u64 bytenr, u64 parent,
4758 int level)
4759 {
4760 struct btrfs_path *path;
4761 struct btrfs_extent_inline_ref *iref;
4762 int ret;
4763
4764 path = btrfs_alloc_path();
4765 if (!path)
4766 return -ENOMEM;
4767
4768 ret = lookup_extent_backref(trans, path, &iref, bytenr,
4769 root->fs_info->nodesize, parent,
4770 root->root_key.objectid, level, 0);
4771 btrfs_free_path(path);
4772 if (ret == -ENOENT)
4773 return 0;
4774 if (ret < 0)
4775 return ret;
4776 return 1;
4777 }
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792 static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4793 struct btrfs_root *root,
4794 struct btrfs_path *path,
4795 struct walk_control *wc, int *lookup_info)
4796 {
4797 struct btrfs_fs_info *fs_info = root->fs_info;
4798 u64 bytenr;
4799 u64 generation;
4800 u64 parent;
4801 struct btrfs_key key;
4802 struct btrfs_key first_key;
4803 struct btrfs_ref ref = { 0 };
4804 struct extent_buffer *next;
4805 int level = wc->level;
4806 int reada = 0;
4807 int ret = 0;
4808 bool need_account = false;
4809
4810 generation = btrfs_node_ptr_generation(path->nodes[level],
4811 path->slots[level]);
4812
4813
4814
4815
4816
4817 if (wc->stage == UPDATE_BACKREF &&
4818 generation <= root->root_key.offset) {
4819 *lookup_info = 1;
4820 return 1;
4821 }
4822
4823 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4824 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
4825 path->slots[level]);
4826
4827 next = find_extent_buffer(fs_info, bytenr);
4828 if (!next) {
4829 next = btrfs_find_create_tree_block(fs_info, bytenr);
4830 if (IS_ERR(next))
4831 return PTR_ERR(next);
4832
4833 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
4834 level - 1);
4835 reada = 1;
4836 }
4837 btrfs_tree_lock(next);
4838 btrfs_set_lock_blocking_write(next);
4839
4840 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
4841 &wc->refs[level - 1],
4842 &wc->flags[level - 1]);
4843 if (ret < 0)
4844 goto out_unlock;
4845
4846 if (unlikely(wc->refs[level - 1] == 0)) {
4847 btrfs_err(fs_info, "Missing references.");
4848 ret = -EIO;
4849 goto out_unlock;
4850 }
4851 *lookup_info = 0;
4852
4853 if (wc->stage == DROP_REFERENCE) {
4854 if (wc->refs[level - 1] > 1) {
4855 need_account = true;
4856 if (level == 1 &&
4857 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4858 goto skip;
4859
4860 if (!wc->update_ref ||
4861 generation <= root->root_key.offset)
4862 goto skip;
4863
4864 btrfs_node_key_to_cpu(path->nodes[level], &key,
4865 path->slots[level]);
4866 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
4867 if (ret < 0)
4868 goto skip;
4869
4870 wc->stage = UPDATE_BACKREF;
4871 wc->shared_level = level - 1;
4872 }
4873 } else {
4874 if (level == 1 &&
4875 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4876 goto skip;
4877 }
4878
4879 if (!btrfs_buffer_uptodate(next, generation, 0)) {
4880 btrfs_tree_unlock(next);
4881 free_extent_buffer(next);
4882 next = NULL;
4883 *lookup_info = 1;
4884 }
4885
4886 if (!next) {
4887 if (reada && level == 1)
4888 reada_walk_down(trans, root, wc, path);
4889 next = read_tree_block(fs_info, bytenr, generation, level - 1,
4890 &first_key);
4891 if (IS_ERR(next)) {
4892 return PTR_ERR(next);
4893 } else if (!extent_buffer_uptodate(next)) {
4894 free_extent_buffer(next);
4895 return -EIO;
4896 }
4897 btrfs_tree_lock(next);
4898 btrfs_set_lock_blocking_write(next);
4899 }
4900
4901 level--;
4902 ASSERT(level == btrfs_header_level(next));
4903 if (level != btrfs_header_level(next)) {
4904 btrfs_err(root->fs_info, "mismatched level");
4905 ret = -EIO;
4906 goto out_unlock;
4907 }
4908 path->nodes[level] = next;
4909 path->slots[level] = 0;
4910 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
4911 wc->level = level;
4912 if (wc->level == 1)
4913 wc->reada_slot = 0;
4914 return 0;
4915 skip:
4916 wc->refs[level - 1] = 0;
4917 wc->flags[level - 1] = 0;
4918 if (wc->stage == DROP_REFERENCE) {
4919 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4920 parent = path->nodes[level]->start;
4921 } else {
4922 ASSERT(root->root_key.objectid ==
4923 btrfs_header_owner(path->nodes[level]));
4924 if (root->root_key.objectid !=
4925 btrfs_header_owner(path->nodes[level])) {
4926 btrfs_err(root->fs_info,
4927 "mismatched block owner");
4928 ret = -EIO;
4929 goto out_unlock;
4930 }
4931 parent = 0;
4932 }
4933
4934
4935
4936
4937
4938
4939
4940 if (wc->restarted) {
4941 ret = check_ref_exists(trans, root, bytenr, parent,
4942 level - 1);
4943 if (ret < 0)
4944 goto out_unlock;
4945 if (ret == 0)
4946 goto no_delete;
4947 ret = 0;
4948 wc->restarted = 0;
4949 }
4950
4951
4952
4953
4954
4955
4956 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
4957 need_account) {
4958 ret = btrfs_qgroup_trace_subtree(trans, next,
4959 generation, level - 1);
4960 if (ret) {
4961 btrfs_err_rl(fs_info,
4962 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
4963 ret);
4964 }
4965 }
4966
4967
4968
4969
4970
4971
4972
4973 wc->drop_level = level;
4974 find_next_key(path, level, &wc->drop_progress);
4975
4976 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
4977 fs_info->nodesize, parent);
4978 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
4979 ret = btrfs_free_extent(trans, &ref);
4980 if (ret)
4981 goto out_unlock;
4982 }
4983 no_delete:
4984 *lookup_info = 1;
4985 ret = 1;
4986
4987 out_unlock:
4988 btrfs_tree_unlock(next);
4989 free_extent_buffer(next);
4990
4991 return ret;
4992 }
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006 static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5007 struct btrfs_root *root,
5008 struct btrfs_path *path,
5009 struct walk_control *wc)
5010 {
5011 struct btrfs_fs_info *fs_info = root->fs_info;
5012 int ret;
5013 int level = wc->level;
5014 struct extent_buffer *eb = path->nodes[level];
5015 u64 parent = 0;
5016
5017 if (wc->stage == UPDATE_BACKREF) {
5018 BUG_ON(wc->shared_level < level);
5019 if (level < wc->shared_level)
5020 goto out;
5021
5022 ret = find_next_key(path, level + 1, &wc->update_progress);
5023 if (ret > 0)
5024 wc->update_ref = 0;
5025
5026 wc->stage = DROP_REFERENCE;
5027 wc->shared_level = -1;
5028 path->slots[level] = 0;
5029
5030
5031
5032
5033
5034
5035 if (!path->locks[level]) {
5036 BUG_ON(level == 0);
5037 btrfs_tree_lock(eb);
5038 btrfs_set_lock_blocking_write(eb);
5039 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5040
5041 ret = btrfs_lookup_extent_info(trans, fs_info,
5042 eb->start, level, 1,
5043 &wc->refs[level],
5044 &wc->flags[level]);
5045 if (ret < 0) {
5046 btrfs_tree_unlock_rw(eb, path->locks[level]);
5047 path->locks[level] = 0;
5048 return ret;
5049 }
5050 BUG_ON(wc->refs[level] == 0);
5051 if (wc->refs[level] == 1) {
5052 btrfs_tree_unlock_rw(eb, path->locks[level]);
5053 path->locks[level] = 0;
5054 return 1;
5055 }
5056 }
5057 }
5058
5059
5060 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5061
5062 if (wc->refs[level] == 1) {
5063 if (level == 0) {
5064 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5065 ret = btrfs_dec_ref(trans, root, eb, 1);
5066 else
5067 ret = btrfs_dec_ref(trans, root, eb, 0);
5068 BUG_ON(ret);
5069 if (is_fstree(root->root_key.objectid)) {
5070 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5071 if (ret) {
5072 btrfs_err_rl(fs_info,
5073 "error %d accounting leaf items, quota is out of sync, rescan required",
5074 ret);
5075 }
5076 }
5077 }
5078
5079 if (!path->locks[level] &&
5080 btrfs_header_generation(eb) == trans->transid) {
5081 btrfs_tree_lock(eb);
5082 btrfs_set_lock_blocking_write(eb);
5083 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5084 }
5085 btrfs_clean_tree_block(eb);
5086 }
5087
5088 if (eb == root->node) {
5089 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5090 parent = eb->start;
5091 else if (root->root_key.objectid != btrfs_header_owner(eb))
5092 goto owner_mismatch;
5093 } else {
5094 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5095 parent = path->nodes[level + 1]->start;
5096 else if (root->root_key.objectid !=
5097 btrfs_header_owner(path->nodes[level + 1]))
5098 goto owner_mismatch;
5099 }
5100
5101 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5102 out:
5103 wc->refs[level] = 0;
5104 wc->flags[level] = 0;
5105 return 0;
5106
5107 owner_mismatch:
5108 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5109 btrfs_header_owner(eb), root->root_key.objectid);
5110 return -EUCLEAN;
5111 }
5112
5113 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5114 struct btrfs_root *root,
5115 struct btrfs_path *path,
5116 struct walk_control *wc)
5117 {
5118 int level = wc->level;
5119 int lookup_info = 1;
5120 int ret;
5121
5122 while (level >= 0) {
5123 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5124 if (ret > 0)
5125 break;
5126
5127 if (level == 0)
5128 break;
5129
5130 if (path->slots[level] >=
5131 btrfs_header_nritems(path->nodes[level]))
5132 break;
5133
5134 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5135 if (ret > 0) {
5136 path->slots[level]++;
5137 continue;
5138 } else if (ret < 0)
5139 return ret;
5140 level = wc->level;
5141 }
5142 return 0;
5143 }
5144
5145 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5146 struct btrfs_root *root,
5147 struct btrfs_path *path,
5148 struct walk_control *wc, int max_level)
5149 {
5150 int level = wc->level;
5151 int ret;
5152
5153 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5154 while (level < max_level && path->nodes[level]) {
5155 wc->level = level;
5156 if (path->slots[level] + 1 <
5157 btrfs_header_nritems(path->nodes[level])) {
5158 path->slots[level]++;
5159 return 0;
5160 } else {
5161 ret = walk_up_proc(trans, root, path, wc);
5162 if (ret > 0)
5163 return 0;
5164 if (ret < 0)
5165 return ret;
5166
5167 if (path->locks[level]) {
5168 btrfs_tree_unlock_rw(path->nodes[level],
5169 path->locks[level]);
5170 path->locks[level] = 0;
5171 }
5172 free_extent_buffer(path->nodes[level]);
5173 path->nodes[level] = NULL;
5174 level++;
5175 }
5176 }
5177 return 1;
5178 }
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193 int btrfs_drop_snapshot(struct btrfs_root *root,
5194 struct btrfs_block_rsv *block_rsv, int update_ref,
5195 int for_reloc)
5196 {
5197 struct btrfs_fs_info *fs_info = root->fs_info;
5198 struct btrfs_path *path;
5199 struct btrfs_trans_handle *trans;
5200 struct btrfs_root *tree_root = fs_info->tree_root;
5201 struct btrfs_root_item *root_item = &root->root_item;
5202 struct walk_control *wc;
5203 struct btrfs_key key;
5204 int err = 0;
5205 int ret;
5206 int level;
5207 bool root_dropped = false;
5208
5209 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5210
5211 path = btrfs_alloc_path();
5212 if (!path) {
5213 err = -ENOMEM;
5214 goto out;
5215 }
5216
5217 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5218 if (!wc) {
5219 btrfs_free_path(path);
5220 err = -ENOMEM;
5221 goto out;
5222 }
5223
5224 trans = btrfs_start_transaction(tree_root, 0);
5225 if (IS_ERR(trans)) {
5226 err = PTR_ERR(trans);
5227 goto out_free;
5228 }
5229
5230 err = btrfs_run_delayed_items(trans);
5231 if (err)
5232 goto out_end_trans;
5233
5234 if (block_rsv)
5235 trans->block_rsv = block_rsv;
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245 set_bit(BTRFS_ROOT_DELETING, &root->state);
5246 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5247 level = btrfs_header_level(root->node);
5248 path->nodes[level] = btrfs_lock_root_node(root);
5249 btrfs_set_lock_blocking_write(path->nodes[level]);
5250 path->slots[level] = 0;
5251 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5252 memset(&wc->update_progress, 0,
5253 sizeof(wc->update_progress));
5254 } else {
5255 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5256 memcpy(&wc->update_progress, &key,
5257 sizeof(wc->update_progress));
5258
5259 level = root_item->drop_level;
5260 BUG_ON(level == 0);
5261 path->lowest_level = level;
5262 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5263 path->lowest_level = 0;
5264 if (ret < 0) {
5265 err = ret;
5266 goto out_end_trans;
5267 }
5268 WARN_ON(ret > 0);
5269
5270
5271
5272
5273
5274 btrfs_unlock_up_safe(path, 0);
5275
5276 level = btrfs_header_level(root->node);
5277 while (1) {
5278 btrfs_tree_lock(path->nodes[level]);
5279 btrfs_set_lock_blocking_write(path->nodes[level]);
5280 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5281
5282 ret = btrfs_lookup_extent_info(trans, fs_info,
5283 path->nodes[level]->start,
5284 level, 1, &wc->refs[level],
5285 &wc->flags[level]);
5286 if (ret < 0) {
5287 err = ret;
5288 goto out_end_trans;
5289 }
5290 BUG_ON(wc->refs[level] == 0);
5291
5292 if (level == root_item->drop_level)
5293 break;
5294
5295 btrfs_tree_unlock(path->nodes[level]);
5296 path->locks[level] = 0;
5297 WARN_ON(wc->refs[level] != 1);
5298 level--;
5299 }
5300 }
5301
5302 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5303 wc->level = level;
5304 wc->shared_level = -1;
5305 wc->stage = DROP_REFERENCE;
5306 wc->update_ref = update_ref;
5307 wc->keep_locks = 0;
5308 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5309
5310 while (1) {
5311
5312 ret = walk_down_tree(trans, root, path, wc);
5313 if (ret < 0) {
5314 err = ret;
5315 break;
5316 }
5317
5318 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5319 if (ret < 0) {
5320 err = ret;
5321 break;
5322 }
5323
5324 if (ret > 0) {
5325 BUG_ON(wc->stage != DROP_REFERENCE);
5326 break;
5327 }
5328
5329 if (wc->stage == DROP_REFERENCE) {
5330 wc->drop_level = wc->level;
5331 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5332 &wc->drop_progress,
5333 path->slots[wc->drop_level]);
5334 }
5335 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5336 &wc->drop_progress);
5337 root_item->drop_level = wc->drop_level;
5338
5339 BUG_ON(wc->level == 0);
5340 if (btrfs_should_end_transaction(trans) ||
5341 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5342 ret = btrfs_update_root(trans, tree_root,
5343 &root->root_key,
5344 root_item);
5345 if (ret) {
5346 btrfs_abort_transaction(trans, ret);
5347 err = ret;
5348 goto out_end_trans;
5349 }
5350
5351 btrfs_end_transaction_throttle(trans);
5352 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5353 btrfs_debug(fs_info,
5354 "drop snapshot early exit");
5355 err = -EAGAIN;
5356 goto out_free;
5357 }
5358
5359 trans = btrfs_start_transaction(tree_root, 0);
5360 if (IS_ERR(trans)) {
5361 err = PTR_ERR(trans);
5362 goto out_free;
5363 }
5364 if (block_rsv)
5365 trans->block_rsv = block_rsv;
5366 }
5367 }
5368 btrfs_release_path(path);
5369 if (err)
5370 goto out_end_trans;
5371
5372 ret = btrfs_del_root(trans, &root->root_key);
5373 if (ret) {
5374 btrfs_abort_transaction(trans, ret);
5375 err = ret;
5376 goto out_end_trans;
5377 }
5378
5379 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5380 ret = btrfs_find_root(tree_root, &root->root_key, path,
5381 NULL, NULL);
5382 if (ret < 0) {
5383 btrfs_abort_transaction(trans, ret);
5384 err = ret;
5385 goto out_end_trans;
5386 } else if (ret > 0) {
5387
5388
5389
5390
5391
5392 btrfs_del_orphan_item(trans, tree_root,
5393 root->root_key.objectid);
5394 }
5395 }
5396
5397 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
5398 btrfs_add_dropped_root(trans, root);
5399 } else {
5400 free_extent_buffer(root->node);
5401 free_extent_buffer(root->commit_root);
5402 btrfs_put_fs_root(root);
5403 }
5404 root_dropped = true;
5405 out_end_trans:
5406 btrfs_end_transaction_throttle(trans);
5407 out_free:
5408 kfree(wc);
5409 btrfs_free_path(path);
5410 out:
5411
5412
5413
5414
5415
5416
5417
5418 if (!for_reloc && !root_dropped)
5419 btrfs_add_dead_root(root);
5420 if (err && err != -EAGAIN)
5421 btrfs_handle_fs_error(fs_info, err, NULL);
5422 return err;
5423 }
5424
5425
5426
5427
5428
5429
5430
5431 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5432 struct btrfs_root *root,
5433 struct extent_buffer *node,
5434 struct extent_buffer *parent)
5435 {
5436 struct btrfs_fs_info *fs_info = root->fs_info;
5437 struct btrfs_path *path;
5438 struct walk_control *wc;
5439 int level;
5440 int parent_level;
5441 int ret = 0;
5442 int wret;
5443
5444 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5445
5446 path = btrfs_alloc_path();
5447 if (!path)
5448 return -ENOMEM;
5449
5450 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5451 if (!wc) {
5452 btrfs_free_path(path);
5453 return -ENOMEM;
5454 }
5455
5456 btrfs_assert_tree_locked(parent);
5457 parent_level = btrfs_header_level(parent);
5458 extent_buffer_get(parent);
5459 path->nodes[parent_level] = parent;
5460 path->slots[parent_level] = btrfs_header_nritems(parent);
5461
5462 btrfs_assert_tree_locked(node);
5463 level = btrfs_header_level(node);
5464 path->nodes[level] = node;
5465 path->slots[level] = 0;
5466 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5467
5468 wc->refs[parent_level] = 1;
5469 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5470 wc->level = level;
5471 wc->shared_level = -1;
5472 wc->stage = DROP_REFERENCE;
5473 wc->update_ref = 0;
5474 wc->keep_locks = 1;
5475 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5476
5477 while (1) {
5478 wret = walk_down_tree(trans, root, path, wc);
5479 if (wret < 0) {
5480 ret = wret;
5481 break;
5482 }
5483
5484 wret = walk_up_tree(trans, root, path, wc, parent_level);
5485 if (wret < 0)
5486 ret = wret;
5487 if (wret != 0)
5488 break;
5489 }
5490
5491 kfree(wc);
5492 btrfs_free_path(path);
5493 return ret;
5494 }
5495
5496
5497
5498
5499
5500 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5501 {
5502 struct btrfs_block_group_cache *block_group;
5503 u64 free_bytes = 0;
5504 int factor;
5505
5506
5507 if (list_empty(&sinfo->ro_bgs))
5508 return 0;
5509
5510 spin_lock(&sinfo->lock);
5511 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5512 spin_lock(&block_group->lock);
5513
5514 if (!block_group->ro) {
5515 spin_unlock(&block_group->lock);
5516 continue;
5517 }
5518
5519 factor = btrfs_bg_type_to_factor(block_group->flags);
5520 free_bytes += (block_group->key.offset -
5521 btrfs_block_group_used(&block_group->item)) *
5522 factor;
5523
5524 spin_unlock(&block_group->lock);
5525 }
5526 spin_unlock(&sinfo->lock);
5527
5528 return free_bytes;
5529 }
5530
5531 int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5532 u64 start, u64 end)
5533 {
5534 return unpin_extent_range(fs_info, start, end, false);
5535 }
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557 static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5558 {
5559 u64 start = SZ_1M, len = 0, end = 0;
5560 int ret;
5561
5562 *trimmed = 0;
5563
5564
5565 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5566 return 0;
5567
5568
5569 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5570 return 0;
5571
5572
5573 if (device->total_bytes <= device->bytes_used)
5574 return 0;
5575
5576 ret = 0;
5577
5578 while (1) {
5579 struct btrfs_fs_info *fs_info = device->fs_info;
5580 u64 bytes;
5581
5582 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5583 if (ret)
5584 break;
5585
5586 find_first_clear_extent_bit(&device->alloc_state, start,
5587 &start, &end,
5588 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5589
5590
5591 start = max_t(u64, start, SZ_1M);
5592
5593
5594
5595
5596
5597
5598 end = min(end, device->total_bytes - 1);
5599
5600 len = end - start + 1;
5601
5602
5603 if (!len) {
5604 mutex_unlock(&fs_info->chunk_mutex);
5605 ret = 0;
5606 break;
5607 }
5608
5609 ret = btrfs_issue_discard(device->bdev, start, len,
5610 &bytes);
5611 if (!ret)
5612 set_extent_bits(&device->alloc_state, start,
5613 start + bytes - 1,
5614 CHUNK_TRIMMED);
5615 mutex_unlock(&fs_info->chunk_mutex);
5616
5617 if (ret)
5618 break;
5619
5620 start += len;
5621 *trimmed += bytes;
5622
5623 if (fatal_signal_pending(current)) {
5624 ret = -ERESTARTSYS;
5625 break;
5626 }
5627
5628 cond_resched();
5629 }
5630
5631 return ret;
5632 }
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643 int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5644 {
5645 struct btrfs_block_group_cache *cache = NULL;
5646 struct btrfs_device *device;
5647 struct list_head *devices;
5648 u64 group_trimmed;
5649 u64 range_end = U64_MAX;
5650 u64 start;
5651 u64 end;
5652 u64 trimmed = 0;
5653 u64 bg_failed = 0;
5654 u64 dev_failed = 0;
5655 int bg_ret = 0;
5656 int dev_ret = 0;
5657 int ret = 0;
5658
5659
5660
5661
5662
5663 if (range->len != U64_MAX &&
5664 check_add_overflow(range->start, range->len, &range_end))
5665 return -EINVAL;
5666
5667 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5668 for (; cache; cache = btrfs_next_block_group(cache)) {
5669 if (cache->key.objectid >= range_end) {
5670 btrfs_put_block_group(cache);
5671 break;
5672 }
5673
5674 start = max(range->start, cache->key.objectid);
5675 end = min(range_end, cache->key.objectid + cache->key.offset);
5676
5677 if (end - start >= range->minlen) {
5678 if (!btrfs_block_group_cache_done(cache)) {
5679 ret = btrfs_cache_block_group(cache, 0);
5680 if (ret) {
5681 bg_failed++;
5682 bg_ret = ret;
5683 continue;
5684 }
5685 ret = btrfs_wait_block_group_cache_done(cache);
5686 if (ret) {
5687 bg_failed++;
5688 bg_ret = ret;
5689 continue;
5690 }
5691 }
5692 ret = btrfs_trim_block_group(cache,
5693 &group_trimmed,
5694 start,
5695 end,
5696 range->minlen);
5697
5698 trimmed += group_trimmed;
5699 if (ret) {
5700 bg_failed++;
5701 bg_ret = ret;
5702 continue;
5703 }
5704 }
5705 }
5706
5707 if (bg_failed)
5708 btrfs_warn(fs_info,
5709 "failed to trim %llu block group(s), last error %d",
5710 bg_failed, bg_ret);
5711 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5712 devices = &fs_info->fs_devices->devices;
5713 list_for_each_entry(device, devices, dev_list) {
5714 ret = btrfs_trim_free_extents(device, &group_trimmed);
5715 if (ret) {
5716 dev_failed++;
5717 dev_ret = ret;
5718 break;
5719 }
5720
5721 trimmed += group_trimmed;
5722 }
5723 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5724
5725 if (dev_failed)
5726 btrfs_warn(fs_info,
5727 "failed to trim %llu device(s), last error %d",
5728 dev_failed, dev_ret);
5729 range->len = trimmed;
5730 if (bg_ret)
5731 return bg_ret;
5732 return dev_ret;
5733 }
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743 void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
5744 {
5745 percpu_counter_dec(&root->subv_writers->counter);
5746 cond_wake_up(&root->subv_writers->wait);
5747 }
5748
5749 int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
5750 {
5751 if (atomic_read(&root->will_be_snapshotted))
5752 return 0;
5753
5754 percpu_counter_inc(&root->subv_writers->counter);
5755
5756
5757
5758 smp_mb();
5759 if (atomic_read(&root->will_be_snapshotted)) {
5760 btrfs_end_write_no_snapshotting(root);
5761 return 0;
5762 }
5763 return 1;
5764 }
5765
5766 void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
5767 {
5768 while (true) {
5769 int ret;
5770
5771 ret = btrfs_start_write_no_snapshotting(root);
5772 if (ret)
5773 break;
5774 wait_var_event(&root->will_be_snapshotted,
5775 !atomic_read(&root->will_be_snapshotted));
5776 }
5777 }