This source file includes following definitions.
- extent_state_in_tree
- btrfs_leak_debug_add
- btrfs_leak_debug_del
- btrfs_leak_debug_check
- __btrfs_debug_check_extent_io_range
- add_extent_changeset
- submit_one_bio
- end_write_bio
- flush_write_bio
- extent_io_init
- extent_io_exit
- extent_io_tree_init
- extent_io_tree_release
- alloc_extent_state
- free_extent_state
- tree_insert
- __etree_search
- tree_search_for_insert
- tree_search
- merge_state
- insert_state
- split_state
- next_state
- clear_state_bit
- alloc_extent_state_atomic
- extent_io_tree_panic
- __clear_extent_bit
- wait_on_state
- wait_extent_bit
- set_state_bits
- cache_state_if_flags
- cache_state
- __set_extent_bit
- set_extent_bit
- convert_extent_bit
- set_record_extent_bits
- set_extent_bits_nowait
- clear_extent_bit
- clear_record_extent_bits
- lock_extent_bits
- try_lock_extent
- extent_range_clear_dirty_for_io
- extent_range_redirty_for_io
- find_first_extent_bit_state
- find_first_extent_bit
- find_first_clear_extent_bit
- find_delalloc_range
- __unlock_for_delalloc
- lock_delalloc_pages
- find_lock_delalloc_range
- __process_pages_contig
- extent_clear_unlock_delalloc
- count_range_bits
- set_state_failrec
- get_state_failrec
- test_range_bit
- check_page_uptodate
- free_io_failure
- repair_io_failure
- btrfs_repair_eb_io_failure
- clean_io_failure
- btrfs_free_io_failure_record
- btrfs_get_io_failure_record
- btrfs_check_repairable
- btrfs_create_repair_bio
- bio_readpage_error
- end_extent_writepage
- end_bio_extent_writepage
- endio_readpage_release_extent
- end_bio_extent_readpage
- btrfs_io_bio_init
- btrfs_bio_alloc
- btrfs_bio_clone
- btrfs_io_bio_alloc
- btrfs_bio_clone_partial
- submit_extent_page
- attach_extent_buffer_page
- set_page_extent_mapped
- __get_extent_map
- __do_readpage
- contiguous_readpages
- __extent_read_full_page
- extent_read_full_page
- update_nr_written
- writepage_delalloc
- __extent_writepage_io
- __extent_writepage
- wait_on_extent_buffer_writeback
- end_extent_buffer_writeback
- lock_extent_buffer_for_io
- set_btree_ioerr
- end_bio_extent_buffer_writepage
- write_one_eb
- btree_write_cache_pages
- extent_write_cache_pages
- extent_write_full_page
- extent_write_locked_range
- extent_writepages
- extent_readpages
- extent_invalidatepage
- try_release_extent_state
- try_release_extent_mapping
- get_extent_skip_holes
- emit_fiemap_extent
- emit_last_fiemap_cache
- extent_fiemap
- __free_extent_buffer
- extent_buffer_under_io
- btrfs_release_extent_buffer_pages
- btrfs_release_extent_buffer
- __alloc_extent_buffer
- btrfs_clone_extent_buffer
- __alloc_dummy_extent_buffer
- alloc_dummy_extent_buffer
- check_buffer_tree_ref
- mark_extent_buffer_accessed
- find_extent_buffer
- alloc_test_extent_buffer
- alloc_extent_buffer
- btrfs_release_extent_buffer_rcu
- release_extent_buffer
- free_extent_buffer
- free_extent_buffer_stale
- clear_extent_buffer_dirty
- set_extent_buffer_dirty
- clear_extent_buffer_uptodate
- set_extent_buffer_uptodate
- read_extent_buffer_pages
- read_extent_buffer
- read_extent_buffer_to_user
- map_private_extent_buffer
- memcmp_extent_buffer
- write_extent_buffer_chunk_tree_uuid
- write_extent_buffer_fsid
- write_extent_buffer
- memzero_extent_buffer
- copy_extent_buffer_full
- copy_extent_buffer
- eb_bitmap_offset
- extent_buffer_test_bit
- extent_buffer_bitmap_set
- extent_buffer_bitmap_clear
- areas_overlap
- copy_pages
- memcpy_extent_buffer
- memmove_extent_buffer
- try_release_extent_buffer
1
2
3 #include <linux/bitops.h>
4 #include <linux/slab.h>
5 #include <linux/bio.h>
6 #include <linux/mm.h>
7 #include <linux/pagemap.h>
8 #include <linux/page-flags.h>
9 #include <linux/spinlock.h>
10 #include <linux/blkdev.h>
11 #include <linux/swap.h>
12 #include <linux/writeback.h>
13 #include <linux/pagevec.h>
14 #include <linux/prefetch.h>
15 #include <linux/cleancache.h>
16 #include "extent_io.h"
17 #include "extent_map.h"
18 #include "ctree.h"
19 #include "btrfs_inode.h"
20 #include "volumes.h"
21 #include "check-integrity.h"
22 #include "locking.h"
23 #include "rcu-string.h"
24 #include "backref.h"
25 #include "disk-io.h"
26
27 static struct kmem_cache *extent_state_cache;
28 static struct kmem_cache *extent_buffer_cache;
29 static struct bio_set btrfs_bioset;
30
31 static inline bool extent_state_in_tree(const struct extent_state *state)
32 {
33 return !RB_EMPTY_NODE(&state->rb_node);
34 }
35
36 #ifdef CONFIG_BTRFS_DEBUG
37 static LIST_HEAD(buffers);
38 static LIST_HEAD(states);
39
40 static DEFINE_SPINLOCK(leak_lock);
41
42 static inline
43 void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
44 {
45 unsigned long flags;
46
47 spin_lock_irqsave(&leak_lock, flags);
48 list_add(new, head);
49 spin_unlock_irqrestore(&leak_lock, flags);
50 }
51
52 static inline
53 void btrfs_leak_debug_del(struct list_head *entry)
54 {
55 unsigned long flags;
56
57 spin_lock_irqsave(&leak_lock, flags);
58 list_del(entry);
59 spin_unlock_irqrestore(&leak_lock, flags);
60 }
61
62 static inline
63 void btrfs_leak_debug_check(void)
64 {
65 struct extent_state *state;
66 struct extent_buffer *eb;
67
68 while (!list_empty(&states)) {
69 state = list_entry(states.next, struct extent_state, leak_list);
70 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
71 state->start, state->end, state->state,
72 extent_state_in_tree(state),
73 refcount_read(&state->refs));
74 list_del(&state->leak_list);
75 kmem_cache_free(extent_state_cache, state);
76 }
77
78 while (!list_empty(&buffers)) {
79 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
80 pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
81 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
82 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
85 }
86
87 #define btrfs_debug_check_extent_io_range(tree, start, end) \
88 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
89 static inline void __btrfs_debug_check_extent_io_range(const char *caller,
90 struct extent_io_tree *tree, u64 start, u64 end)
91 {
92 struct inode *inode = tree->private_data;
93 u64 isize;
94
95 if (!inode || !is_data_inode(inode))
96 return;
97
98 isize = i_size_read(inode);
99 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
100 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
101 "%s: ino %llu isize %llu odd range [%llu,%llu]",
102 caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
103 }
104 }
105 #else
106 #define btrfs_leak_debug_add(new, head) do {} while (0)
107 #define btrfs_leak_debug_del(entry) do {} while (0)
108 #define btrfs_leak_debug_check() do {} while (0)
109 #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
110 #endif
111
112 struct tree_entry {
113 u64 start;
114 u64 end;
115 struct rb_node rb_node;
116 };
117
118 struct extent_page_data {
119 struct bio *bio;
120 struct extent_io_tree *tree;
121
122
123
124 unsigned int extent_locked:1;
125
126
127 unsigned int sync_io:1;
128 };
129
130 static int add_extent_changeset(struct extent_state *state, unsigned bits,
131 struct extent_changeset *changeset,
132 int set)
133 {
134 int ret;
135
136 if (!changeset)
137 return 0;
138 if (set && (state->state & bits) == bits)
139 return 0;
140 if (!set && (state->state & bits) == 0)
141 return 0;
142 changeset->bytes_changed += state->end - state->start + 1;
143 ret = ulist_add(&changeset->range_changed, state->start, state->end,
144 GFP_ATOMIC);
145 return ret;
146 }
147
148 static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
149 unsigned long bio_flags)
150 {
151 blk_status_t ret = 0;
152 struct extent_io_tree *tree = bio->bi_private;
153
154 bio->bi_private = NULL;
155
156 if (tree->ops)
157 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
158 mirror_num, bio_flags);
159 else
160 btrfsic_submit_bio(bio);
161
162 return blk_status_to_errno(ret);
163 }
164
165
166 static void end_write_bio(struct extent_page_data *epd, int ret)
167 {
168 if (epd->bio) {
169 epd->bio->bi_status = errno_to_blk_status(ret);
170 bio_endio(epd->bio);
171 epd->bio = NULL;
172 }
173 }
174
175
176
177
178
179
180
181 static int __must_check flush_write_bio(struct extent_page_data *epd)
182 {
183 int ret = 0;
184
185 if (epd->bio) {
186 ret = submit_one_bio(epd->bio, 0, 0);
187
188
189
190
191
192
193
194 epd->bio = NULL;
195 }
196 return ret;
197 }
198
199 int __init extent_io_init(void)
200 {
201 extent_state_cache = kmem_cache_create("btrfs_extent_state",
202 sizeof(struct extent_state), 0,
203 SLAB_MEM_SPREAD, NULL);
204 if (!extent_state_cache)
205 return -ENOMEM;
206
207 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
208 sizeof(struct extent_buffer), 0,
209 SLAB_MEM_SPREAD, NULL);
210 if (!extent_buffer_cache)
211 goto free_state_cache;
212
213 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
214 offsetof(struct btrfs_io_bio, bio),
215 BIOSET_NEED_BVECS))
216 goto free_buffer_cache;
217
218 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
219 goto free_bioset;
220
221 return 0;
222
223 free_bioset:
224 bioset_exit(&btrfs_bioset);
225
226 free_buffer_cache:
227 kmem_cache_destroy(extent_buffer_cache);
228 extent_buffer_cache = NULL;
229
230 free_state_cache:
231 kmem_cache_destroy(extent_state_cache);
232 extent_state_cache = NULL;
233 return -ENOMEM;
234 }
235
236 void __cold extent_io_exit(void)
237 {
238 btrfs_leak_debug_check();
239
240
241
242
243
244 rcu_barrier();
245 kmem_cache_destroy(extent_state_cache);
246 kmem_cache_destroy(extent_buffer_cache);
247 bioset_exit(&btrfs_bioset);
248 }
249
250 void extent_io_tree_init(struct btrfs_fs_info *fs_info,
251 struct extent_io_tree *tree, unsigned int owner,
252 void *private_data)
253 {
254 tree->fs_info = fs_info;
255 tree->state = RB_ROOT;
256 tree->ops = NULL;
257 tree->dirty_bytes = 0;
258 spin_lock_init(&tree->lock);
259 tree->private_data = private_data;
260 tree->owner = owner;
261 }
262
263 void extent_io_tree_release(struct extent_io_tree *tree)
264 {
265 spin_lock(&tree->lock);
266
267
268
269
270
271 smp_mb();
272 while (!RB_EMPTY_ROOT(&tree->state)) {
273 struct rb_node *node;
274 struct extent_state *state;
275
276 node = rb_first(&tree->state);
277 state = rb_entry(node, struct extent_state, rb_node);
278 rb_erase(&state->rb_node, &tree->state);
279 RB_CLEAR_NODE(&state->rb_node);
280
281
282
283
284 ASSERT(!waitqueue_active(&state->wq));
285 free_extent_state(state);
286
287 cond_resched_lock(&tree->lock);
288 }
289 spin_unlock(&tree->lock);
290 }
291
292 static struct extent_state *alloc_extent_state(gfp_t mask)
293 {
294 struct extent_state *state;
295
296
297
298
299
300 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
301 state = kmem_cache_alloc(extent_state_cache, mask);
302 if (!state)
303 return state;
304 state->state = 0;
305 state->failrec = NULL;
306 RB_CLEAR_NODE(&state->rb_node);
307 btrfs_leak_debug_add(&state->leak_list, &states);
308 refcount_set(&state->refs, 1);
309 init_waitqueue_head(&state->wq);
310 trace_alloc_extent_state(state, mask, _RET_IP_);
311 return state;
312 }
313
314 void free_extent_state(struct extent_state *state)
315 {
316 if (!state)
317 return;
318 if (refcount_dec_and_test(&state->refs)) {
319 WARN_ON(extent_state_in_tree(state));
320 btrfs_leak_debug_del(&state->leak_list);
321 trace_free_extent_state(state, _RET_IP_);
322 kmem_cache_free(extent_state_cache, state);
323 }
324 }
325
326 static struct rb_node *tree_insert(struct rb_root *root,
327 struct rb_node *search_start,
328 u64 offset,
329 struct rb_node *node,
330 struct rb_node ***p_in,
331 struct rb_node **parent_in)
332 {
333 struct rb_node **p;
334 struct rb_node *parent = NULL;
335 struct tree_entry *entry;
336
337 if (p_in && parent_in) {
338 p = *p_in;
339 parent = *parent_in;
340 goto do_insert;
341 }
342
343 p = search_start ? &search_start : &root->rb_node;
344 while (*p) {
345 parent = *p;
346 entry = rb_entry(parent, struct tree_entry, rb_node);
347
348 if (offset < entry->start)
349 p = &(*p)->rb_left;
350 else if (offset > entry->end)
351 p = &(*p)->rb_right;
352 else
353 return parent;
354 }
355
356 do_insert:
357 rb_link_node(node, parent, p);
358 rb_insert_color(node, root);
359 return NULL;
360 }
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380 static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
381 struct rb_node **next_ret,
382 struct rb_node **prev_ret,
383 struct rb_node ***p_ret,
384 struct rb_node **parent_ret)
385 {
386 struct rb_root *root = &tree->state;
387 struct rb_node **n = &root->rb_node;
388 struct rb_node *prev = NULL;
389 struct rb_node *orig_prev = NULL;
390 struct tree_entry *entry;
391 struct tree_entry *prev_entry = NULL;
392
393 while (*n) {
394 prev = *n;
395 entry = rb_entry(prev, struct tree_entry, rb_node);
396 prev_entry = entry;
397
398 if (offset < entry->start)
399 n = &(*n)->rb_left;
400 else if (offset > entry->end)
401 n = &(*n)->rb_right;
402 else
403 return *n;
404 }
405
406 if (p_ret)
407 *p_ret = n;
408 if (parent_ret)
409 *parent_ret = prev;
410
411 if (next_ret) {
412 orig_prev = prev;
413 while (prev && offset > prev_entry->end) {
414 prev = rb_next(prev);
415 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
416 }
417 *next_ret = prev;
418 prev = orig_prev;
419 }
420
421 if (prev_ret) {
422 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
423 while (prev && offset < prev_entry->start) {
424 prev = rb_prev(prev);
425 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
426 }
427 *prev_ret = prev;
428 }
429 return NULL;
430 }
431
432 static inline struct rb_node *
433 tree_search_for_insert(struct extent_io_tree *tree,
434 u64 offset,
435 struct rb_node ***p_ret,
436 struct rb_node **parent_ret)
437 {
438 struct rb_node *next= NULL;
439 struct rb_node *ret;
440
441 ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
442 if (!ret)
443 return next;
444 return ret;
445 }
446
447 static inline struct rb_node *tree_search(struct extent_io_tree *tree,
448 u64 offset)
449 {
450 return tree_search_for_insert(tree, offset, NULL, NULL);
451 }
452
453
454
455
456
457
458
459
460
461
462 static void merge_state(struct extent_io_tree *tree,
463 struct extent_state *state)
464 {
465 struct extent_state *other;
466 struct rb_node *other_node;
467
468 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
469 return;
470
471 other_node = rb_prev(&state->rb_node);
472 if (other_node) {
473 other = rb_entry(other_node, struct extent_state, rb_node);
474 if (other->end == state->start - 1 &&
475 other->state == state->state) {
476 if (tree->private_data &&
477 is_data_inode(tree->private_data))
478 btrfs_merge_delalloc_extent(tree->private_data,
479 state, other);
480 state->start = other->start;
481 rb_erase(&other->rb_node, &tree->state);
482 RB_CLEAR_NODE(&other->rb_node);
483 free_extent_state(other);
484 }
485 }
486 other_node = rb_next(&state->rb_node);
487 if (other_node) {
488 other = rb_entry(other_node, struct extent_state, rb_node);
489 if (other->start == state->end + 1 &&
490 other->state == state->state) {
491 if (tree->private_data &&
492 is_data_inode(tree->private_data))
493 btrfs_merge_delalloc_extent(tree->private_data,
494 state, other);
495 state->end = other->end;
496 rb_erase(&other->rb_node, &tree->state);
497 RB_CLEAR_NODE(&other->rb_node);
498 free_extent_state(other);
499 }
500 }
501 }
502
503 static void set_state_bits(struct extent_io_tree *tree,
504 struct extent_state *state, unsigned *bits,
505 struct extent_changeset *changeset);
506
507
508
509
510
511
512
513
514
515
516
517 static int insert_state(struct extent_io_tree *tree,
518 struct extent_state *state, u64 start, u64 end,
519 struct rb_node ***p,
520 struct rb_node **parent,
521 unsigned *bits, struct extent_changeset *changeset)
522 {
523 struct rb_node *node;
524
525 if (end < start) {
526 btrfs_err(tree->fs_info,
527 "insert state: end < start %llu %llu", end, start);
528 WARN_ON(1);
529 }
530 state->start = start;
531 state->end = end;
532
533 set_state_bits(tree, state, bits, changeset);
534
535 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
536 if (node) {
537 struct extent_state *found;
538 found = rb_entry(node, struct extent_state, rb_node);
539 btrfs_err(tree->fs_info,
540 "found node %llu %llu on insert of %llu %llu",
541 found->start, found->end, start, end);
542 return -EEXIST;
543 }
544 merge_state(tree, state);
545 return 0;
546 }
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
563 struct extent_state *prealloc, u64 split)
564 {
565 struct rb_node *node;
566
567 if (tree->private_data && is_data_inode(tree->private_data))
568 btrfs_split_delalloc_extent(tree->private_data, orig, split);
569
570 prealloc->start = orig->start;
571 prealloc->end = split - 1;
572 prealloc->state = orig->state;
573 orig->start = split;
574
575 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
576 &prealloc->rb_node, NULL, NULL);
577 if (node) {
578 free_extent_state(prealloc);
579 return -EEXIST;
580 }
581 return 0;
582 }
583
584 static struct extent_state *next_state(struct extent_state *state)
585 {
586 struct rb_node *next = rb_next(&state->rb_node);
587 if (next)
588 return rb_entry(next, struct extent_state, rb_node);
589 else
590 return NULL;
591 }
592
593
594
595
596
597
598
599
600 static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
601 struct extent_state *state,
602 unsigned *bits, int wake,
603 struct extent_changeset *changeset)
604 {
605 struct extent_state *next;
606 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
607 int ret;
608
609 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
610 u64 range = state->end - state->start + 1;
611 WARN_ON(range > tree->dirty_bytes);
612 tree->dirty_bytes -= range;
613 }
614
615 if (tree->private_data && is_data_inode(tree->private_data))
616 btrfs_clear_delalloc_extent(tree->private_data, state, bits);
617
618 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
619 BUG_ON(ret < 0);
620 state->state &= ~bits_to_clear;
621 if (wake)
622 wake_up(&state->wq);
623 if (state->state == 0) {
624 next = next_state(state);
625 if (extent_state_in_tree(state)) {
626 rb_erase(&state->rb_node, &tree->state);
627 RB_CLEAR_NODE(&state->rb_node);
628 free_extent_state(state);
629 } else {
630 WARN_ON(1);
631 }
632 } else {
633 merge_state(tree, state);
634 next = next_state(state);
635 }
636 return next;
637 }
638
639 static struct extent_state *
640 alloc_extent_state_atomic(struct extent_state *prealloc)
641 {
642 if (!prealloc)
643 prealloc = alloc_extent_state(GFP_ATOMIC);
644
645 return prealloc;
646 }
647
648 static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
649 {
650 struct inode *inode = tree->private_data;
651
652 btrfs_panic(btrfs_sb(inode->i_sb), err,
653 "locking error: extent tree was modified by another thread while locked");
654 }
655
656
657
658
659
660
661
662
663
664
665
666
667
668 int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
669 unsigned bits, int wake, int delete,
670 struct extent_state **cached_state,
671 gfp_t mask, struct extent_changeset *changeset)
672 {
673 struct extent_state *state;
674 struct extent_state *cached;
675 struct extent_state *prealloc = NULL;
676 struct rb_node *node;
677 u64 last_end;
678 int err;
679 int clear = 0;
680
681 btrfs_debug_check_extent_io_range(tree, start, end);
682 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
683
684 if (bits & EXTENT_DELALLOC)
685 bits |= EXTENT_NORESERVE;
686
687 if (delete)
688 bits |= ~EXTENT_CTLBITS;
689
690 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
691 clear = 1;
692 again:
693 if (!prealloc && gfpflags_allow_blocking(mask)) {
694
695
696
697
698
699
700
701 prealloc = alloc_extent_state(mask);
702 }
703
704 spin_lock(&tree->lock);
705 if (cached_state) {
706 cached = *cached_state;
707
708 if (clear) {
709 *cached_state = NULL;
710 cached_state = NULL;
711 }
712
713 if (cached && extent_state_in_tree(cached) &&
714 cached->start <= start && cached->end > start) {
715 if (clear)
716 refcount_dec(&cached->refs);
717 state = cached;
718 goto hit_next;
719 }
720 if (clear)
721 free_extent_state(cached);
722 }
723
724
725
726
727 node = tree_search(tree, start);
728 if (!node)
729 goto out;
730 state = rb_entry(node, struct extent_state, rb_node);
731 hit_next:
732 if (state->start > end)
733 goto out;
734 WARN_ON(state->end < start);
735 last_end = state->end;
736
737
738 if (!(state->state & bits)) {
739 state = next_state(state);
740 goto next;
741 }
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759 if (state->start < start) {
760 prealloc = alloc_extent_state_atomic(prealloc);
761 BUG_ON(!prealloc);
762 err = split_state(tree, state, prealloc, start);
763 if (err)
764 extent_io_tree_panic(tree, err);
765
766 prealloc = NULL;
767 if (err)
768 goto out;
769 if (state->end <= end) {
770 state = clear_state_bit(tree, state, &bits, wake,
771 changeset);
772 goto next;
773 }
774 goto search_again;
775 }
776
777
778
779
780
781
782 if (state->start <= end && state->end > end) {
783 prealloc = alloc_extent_state_atomic(prealloc);
784 BUG_ON(!prealloc);
785 err = split_state(tree, state, prealloc, end + 1);
786 if (err)
787 extent_io_tree_panic(tree, err);
788
789 if (wake)
790 wake_up(&state->wq);
791
792 clear_state_bit(tree, prealloc, &bits, wake, changeset);
793
794 prealloc = NULL;
795 goto out;
796 }
797
798 state = clear_state_bit(tree, state, &bits, wake, changeset);
799 next:
800 if (last_end == (u64)-1)
801 goto out;
802 start = last_end + 1;
803 if (start <= end && state && !need_resched())
804 goto hit_next;
805
806 search_again:
807 if (start > end)
808 goto out;
809 spin_unlock(&tree->lock);
810 if (gfpflags_allow_blocking(mask))
811 cond_resched();
812 goto again;
813
814 out:
815 spin_unlock(&tree->lock);
816 if (prealloc)
817 free_extent_state(prealloc);
818
819 return 0;
820
821 }
822
823 static void wait_on_state(struct extent_io_tree *tree,
824 struct extent_state *state)
825 __releases(tree->lock)
826 __acquires(tree->lock)
827 {
828 DEFINE_WAIT(wait);
829 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
830 spin_unlock(&tree->lock);
831 schedule();
832 spin_lock(&tree->lock);
833 finish_wait(&state->wq, &wait);
834 }
835
836
837
838
839
840
841 static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
842 unsigned long bits)
843 {
844 struct extent_state *state;
845 struct rb_node *node;
846
847 btrfs_debug_check_extent_io_range(tree, start, end);
848
849 spin_lock(&tree->lock);
850 again:
851 while (1) {
852
853
854
855
856 node = tree_search(tree, start);
857 process_node:
858 if (!node)
859 break;
860
861 state = rb_entry(node, struct extent_state, rb_node);
862
863 if (state->start > end)
864 goto out;
865
866 if (state->state & bits) {
867 start = state->start;
868 refcount_inc(&state->refs);
869 wait_on_state(tree, state);
870 free_extent_state(state);
871 goto again;
872 }
873 start = state->end + 1;
874
875 if (start > end)
876 break;
877
878 if (!cond_resched_lock(&tree->lock)) {
879 node = rb_next(node);
880 goto process_node;
881 }
882 }
883 out:
884 spin_unlock(&tree->lock);
885 }
886
887 static void set_state_bits(struct extent_io_tree *tree,
888 struct extent_state *state,
889 unsigned *bits, struct extent_changeset *changeset)
890 {
891 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
892 int ret;
893
894 if (tree->private_data && is_data_inode(tree->private_data))
895 btrfs_set_delalloc_extent(tree->private_data, state, bits);
896
897 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
898 u64 range = state->end - state->start + 1;
899 tree->dirty_bytes += range;
900 }
901 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
902 BUG_ON(ret < 0);
903 state->state |= bits_to_set;
904 }
905
906 static void cache_state_if_flags(struct extent_state *state,
907 struct extent_state **cached_ptr,
908 unsigned flags)
909 {
910 if (cached_ptr && !(*cached_ptr)) {
911 if (!flags || (state->state & flags)) {
912 *cached_ptr = state;
913 refcount_inc(&state->refs);
914 }
915 }
916 }
917
918 static void cache_state(struct extent_state *state,
919 struct extent_state **cached_ptr)
920 {
921 return cache_state_if_flags(state, cached_ptr,
922 EXTENT_LOCKED | EXTENT_BOUNDARY);
923 }
924
925
926
927
928
929
930
931
932
933
934
935
936 static int __must_check
937 __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
938 unsigned bits, unsigned exclusive_bits,
939 u64 *failed_start, struct extent_state **cached_state,
940 gfp_t mask, struct extent_changeset *changeset)
941 {
942 struct extent_state *state;
943 struct extent_state *prealloc = NULL;
944 struct rb_node *node;
945 struct rb_node **p;
946 struct rb_node *parent;
947 int err = 0;
948 u64 last_start;
949 u64 last_end;
950
951 btrfs_debug_check_extent_io_range(tree, start, end);
952 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
953
954 again:
955 if (!prealloc && gfpflags_allow_blocking(mask)) {
956
957
958
959
960
961
962
963 prealloc = alloc_extent_state(mask);
964 }
965
966 spin_lock(&tree->lock);
967 if (cached_state && *cached_state) {
968 state = *cached_state;
969 if (state->start <= start && state->end > start &&
970 extent_state_in_tree(state)) {
971 node = &state->rb_node;
972 goto hit_next;
973 }
974 }
975
976
977
978
979 node = tree_search_for_insert(tree, start, &p, &parent);
980 if (!node) {
981 prealloc = alloc_extent_state_atomic(prealloc);
982 BUG_ON(!prealloc);
983 err = insert_state(tree, prealloc, start, end,
984 &p, &parent, &bits, changeset);
985 if (err)
986 extent_io_tree_panic(tree, err);
987
988 cache_state(prealloc, cached_state);
989 prealloc = NULL;
990 goto out;
991 }
992 state = rb_entry(node, struct extent_state, rb_node);
993 hit_next:
994 last_start = state->start;
995 last_end = state->end;
996
997
998
999
1000
1001
1002
1003 if (state->start == start && state->end <= end) {
1004 if (state->state & exclusive_bits) {
1005 *failed_start = state->start;
1006 err = -EEXIST;
1007 goto out;
1008 }
1009
1010 set_state_bits(tree, state, &bits, changeset);
1011 cache_state(state, cached_state);
1012 merge_state(tree, state);
1013 if (last_end == (u64)-1)
1014 goto out;
1015 start = last_end + 1;
1016 state = next_state(state);
1017 if (start < end && state && state->start == start &&
1018 !need_resched())
1019 goto hit_next;
1020 goto search_again;
1021 }
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039 if (state->start < start) {
1040 if (state->state & exclusive_bits) {
1041 *failed_start = start;
1042 err = -EEXIST;
1043 goto out;
1044 }
1045
1046 prealloc = alloc_extent_state_atomic(prealloc);
1047 BUG_ON(!prealloc);
1048 err = split_state(tree, state, prealloc, start);
1049 if (err)
1050 extent_io_tree_panic(tree, err);
1051
1052 prealloc = NULL;
1053 if (err)
1054 goto out;
1055 if (state->end <= end) {
1056 set_state_bits(tree, state, &bits, changeset);
1057 cache_state(state, cached_state);
1058 merge_state(tree, state);
1059 if (last_end == (u64)-1)
1060 goto out;
1061 start = last_end + 1;
1062 state = next_state(state);
1063 if (start < end && state && state->start == start &&
1064 !need_resched())
1065 goto hit_next;
1066 }
1067 goto search_again;
1068 }
1069
1070
1071
1072
1073
1074
1075
1076 if (state->start > start) {
1077 u64 this_end;
1078 if (end < last_start)
1079 this_end = end;
1080 else
1081 this_end = last_start - 1;
1082
1083 prealloc = alloc_extent_state_atomic(prealloc);
1084 BUG_ON(!prealloc);
1085
1086
1087
1088
1089
1090 err = insert_state(tree, prealloc, start, this_end,
1091 NULL, NULL, &bits, changeset);
1092 if (err)
1093 extent_io_tree_panic(tree, err);
1094
1095 cache_state(prealloc, cached_state);
1096 prealloc = NULL;
1097 start = this_end + 1;
1098 goto search_again;
1099 }
1100
1101
1102
1103
1104
1105
1106 if (state->start <= end && state->end > end) {
1107 if (state->state & exclusive_bits) {
1108 *failed_start = start;
1109 err = -EEXIST;
1110 goto out;
1111 }
1112
1113 prealloc = alloc_extent_state_atomic(prealloc);
1114 BUG_ON(!prealloc);
1115 err = split_state(tree, state, prealloc, end + 1);
1116 if (err)
1117 extent_io_tree_panic(tree, err);
1118
1119 set_state_bits(tree, prealloc, &bits, changeset);
1120 cache_state(prealloc, cached_state);
1121 merge_state(tree, prealloc);
1122 prealloc = NULL;
1123 goto out;
1124 }
1125
1126 search_again:
1127 if (start > end)
1128 goto out;
1129 spin_unlock(&tree->lock);
1130 if (gfpflags_allow_blocking(mask))
1131 cond_resched();
1132 goto again;
1133
1134 out:
1135 spin_unlock(&tree->lock);
1136 if (prealloc)
1137 free_extent_state(prealloc);
1138
1139 return err;
1140
1141 }
1142
1143 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1144 unsigned bits, u64 * failed_start,
1145 struct extent_state **cached_state, gfp_t mask)
1146 {
1147 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1148 cached_state, mask, NULL);
1149 }
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1171 unsigned bits, unsigned clear_bits,
1172 struct extent_state **cached_state)
1173 {
1174 struct extent_state *state;
1175 struct extent_state *prealloc = NULL;
1176 struct rb_node *node;
1177 struct rb_node **p;
1178 struct rb_node *parent;
1179 int err = 0;
1180 u64 last_start;
1181 u64 last_end;
1182 bool first_iteration = true;
1183
1184 btrfs_debug_check_extent_io_range(tree, start, end);
1185 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1186 clear_bits);
1187
1188 again:
1189 if (!prealloc) {
1190
1191
1192
1193
1194
1195
1196
1197 prealloc = alloc_extent_state(GFP_NOFS);
1198 if (!prealloc && !first_iteration)
1199 return -ENOMEM;
1200 }
1201
1202 spin_lock(&tree->lock);
1203 if (cached_state && *cached_state) {
1204 state = *cached_state;
1205 if (state->start <= start && state->end > start &&
1206 extent_state_in_tree(state)) {
1207 node = &state->rb_node;
1208 goto hit_next;
1209 }
1210 }
1211
1212
1213
1214
1215
1216 node = tree_search_for_insert(tree, start, &p, &parent);
1217 if (!node) {
1218 prealloc = alloc_extent_state_atomic(prealloc);
1219 if (!prealloc) {
1220 err = -ENOMEM;
1221 goto out;
1222 }
1223 err = insert_state(tree, prealloc, start, end,
1224 &p, &parent, &bits, NULL);
1225 if (err)
1226 extent_io_tree_panic(tree, err);
1227 cache_state(prealloc, cached_state);
1228 prealloc = NULL;
1229 goto out;
1230 }
1231 state = rb_entry(node, struct extent_state, rb_node);
1232 hit_next:
1233 last_start = state->start;
1234 last_end = state->end;
1235
1236
1237
1238
1239
1240
1241
1242 if (state->start == start && state->end <= end) {
1243 set_state_bits(tree, state, &bits, NULL);
1244 cache_state(state, cached_state);
1245 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1246 if (last_end == (u64)-1)
1247 goto out;
1248 start = last_end + 1;
1249 if (start < end && state && state->start == start &&
1250 !need_resched())
1251 goto hit_next;
1252 goto search_again;
1253 }
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271 if (state->start < start) {
1272 prealloc = alloc_extent_state_atomic(prealloc);
1273 if (!prealloc) {
1274 err = -ENOMEM;
1275 goto out;
1276 }
1277 err = split_state(tree, state, prealloc, start);
1278 if (err)
1279 extent_io_tree_panic(tree, err);
1280 prealloc = NULL;
1281 if (err)
1282 goto out;
1283 if (state->end <= end) {
1284 set_state_bits(tree, state, &bits, NULL);
1285 cache_state(state, cached_state);
1286 state = clear_state_bit(tree, state, &clear_bits, 0,
1287 NULL);
1288 if (last_end == (u64)-1)
1289 goto out;
1290 start = last_end + 1;
1291 if (start < end && state && state->start == start &&
1292 !need_resched())
1293 goto hit_next;
1294 }
1295 goto search_again;
1296 }
1297
1298
1299
1300
1301
1302
1303
1304 if (state->start > start) {
1305 u64 this_end;
1306 if (end < last_start)
1307 this_end = end;
1308 else
1309 this_end = last_start - 1;
1310
1311 prealloc = alloc_extent_state_atomic(prealloc);
1312 if (!prealloc) {
1313 err = -ENOMEM;
1314 goto out;
1315 }
1316
1317
1318
1319
1320
1321 err = insert_state(tree, prealloc, start, this_end,
1322 NULL, NULL, &bits, NULL);
1323 if (err)
1324 extent_io_tree_panic(tree, err);
1325 cache_state(prealloc, cached_state);
1326 prealloc = NULL;
1327 start = this_end + 1;
1328 goto search_again;
1329 }
1330
1331
1332
1333
1334
1335
1336 if (state->start <= end && state->end > end) {
1337 prealloc = alloc_extent_state_atomic(prealloc);
1338 if (!prealloc) {
1339 err = -ENOMEM;
1340 goto out;
1341 }
1342
1343 err = split_state(tree, state, prealloc, end + 1);
1344 if (err)
1345 extent_io_tree_panic(tree, err);
1346
1347 set_state_bits(tree, prealloc, &bits, NULL);
1348 cache_state(prealloc, cached_state);
1349 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1350 prealloc = NULL;
1351 goto out;
1352 }
1353
1354 search_again:
1355 if (start > end)
1356 goto out;
1357 spin_unlock(&tree->lock);
1358 cond_resched();
1359 first_iteration = false;
1360 goto again;
1361
1362 out:
1363 spin_unlock(&tree->lock);
1364 if (prealloc)
1365 free_extent_state(prealloc);
1366
1367 return err;
1368 }
1369
1370
1371 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1372 unsigned bits, struct extent_changeset *changeset)
1373 {
1374
1375
1376
1377
1378
1379
1380 BUG_ON(bits & EXTENT_LOCKED);
1381
1382 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1383 changeset);
1384 }
1385
1386 int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
1387 unsigned bits)
1388 {
1389 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
1390 GFP_NOWAIT, NULL);
1391 }
1392
1393 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1394 unsigned bits, int wake, int delete,
1395 struct extent_state **cached)
1396 {
1397 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1398 cached, GFP_NOFS, NULL);
1399 }
1400
1401 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1402 unsigned bits, struct extent_changeset *changeset)
1403 {
1404
1405
1406
1407
1408 BUG_ON(bits & EXTENT_LOCKED);
1409
1410 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1411 changeset);
1412 }
1413
1414
1415
1416
1417
1418 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1419 struct extent_state **cached_state)
1420 {
1421 int err;
1422 u64 failed_start;
1423
1424 while (1) {
1425 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1426 EXTENT_LOCKED, &failed_start,
1427 cached_state, GFP_NOFS, NULL);
1428 if (err == -EEXIST) {
1429 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1430 start = failed_start;
1431 } else
1432 break;
1433 WARN_ON(start > end);
1434 }
1435 return err;
1436 }
1437
1438 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1439 {
1440 int err;
1441 u64 failed_start;
1442
1443 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1444 &failed_start, NULL, GFP_NOFS, NULL);
1445 if (err == -EEXIST) {
1446 if (failed_start > start)
1447 clear_extent_bit(tree, start, failed_start - 1,
1448 EXTENT_LOCKED, 1, 0, NULL);
1449 return 0;
1450 }
1451 return 1;
1452 }
1453
1454 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1455 {
1456 unsigned long index = start >> PAGE_SHIFT;
1457 unsigned long end_index = end >> PAGE_SHIFT;
1458 struct page *page;
1459
1460 while (index <= end_index) {
1461 page = find_get_page(inode->i_mapping, index);
1462 BUG_ON(!page);
1463 clear_page_dirty_for_io(page);
1464 put_page(page);
1465 index++;
1466 }
1467 }
1468
1469 void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1470 {
1471 unsigned long index = start >> PAGE_SHIFT;
1472 unsigned long end_index = end >> PAGE_SHIFT;
1473 struct page *page;
1474
1475 while (index <= end_index) {
1476 page = find_get_page(inode->i_mapping, index);
1477 BUG_ON(!page);
1478 __set_page_dirty_nobuffers(page);
1479 account_page_redirty(page);
1480 put_page(page);
1481 index++;
1482 }
1483 }
1484
1485
1486
1487
1488
1489 static struct extent_state *
1490 find_first_extent_bit_state(struct extent_io_tree *tree,
1491 u64 start, unsigned bits)
1492 {
1493 struct rb_node *node;
1494 struct extent_state *state;
1495
1496
1497
1498
1499
1500 node = tree_search(tree, start);
1501 if (!node)
1502 goto out;
1503
1504 while (1) {
1505 state = rb_entry(node, struct extent_state, rb_node);
1506 if (state->end >= start && (state->state & bits))
1507 return state;
1508
1509 node = rb_next(node);
1510 if (!node)
1511 break;
1512 }
1513 out:
1514 return NULL;
1515 }
1516
1517
1518
1519
1520
1521
1522
1523
1524 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1525 u64 *start_ret, u64 *end_ret, unsigned bits,
1526 struct extent_state **cached_state)
1527 {
1528 struct extent_state *state;
1529 int ret = 1;
1530
1531 spin_lock(&tree->lock);
1532 if (cached_state && *cached_state) {
1533 state = *cached_state;
1534 if (state->end == start - 1 && extent_state_in_tree(state)) {
1535 while ((state = next_state(state)) != NULL) {
1536 if (state->state & bits)
1537 goto got_it;
1538 }
1539 free_extent_state(*cached_state);
1540 *cached_state = NULL;
1541 goto out;
1542 }
1543 free_extent_state(*cached_state);
1544 *cached_state = NULL;
1545 }
1546
1547 state = find_first_extent_bit_state(tree, start, bits);
1548 got_it:
1549 if (state) {
1550 cache_state_if_flags(state, cached_state, 0);
1551 *start_ret = state->start;
1552 *end_ret = state->end;
1553 ret = 0;
1554 }
1555 out:
1556 spin_unlock(&tree->lock);
1557 return ret;
1558 }
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1576 u64 *start_ret, u64 *end_ret, unsigned bits)
1577 {
1578 struct extent_state *state;
1579 struct rb_node *node, *prev = NULL, *next;
1580
1581 spin_lock(&tree->lock);
1582
1583
1584 while (1) {
1585 node = __etree_search(tree, start, &next, &prev, NULL, NULL);
1586 if (!node && !next && !prev) {
1587
1588
1589
1590
1591 *start_ret = 0;
1592 *end_ret = -1;
1593 goto out;
1594 } else if (!node && !next) {
1595
1596
1597
1598
1599 state = rb_entry(prev, struct extent_state, rb_node);
1600 *start_ret = state->end + 1;
1601 *end_ret = -1;
1602 goto out;
1603 } else if (!node) {
1604 node = next;
1605 }
1606
1607
1608
1609
1610 state = rb_entry(node, struct extent_state, rb_node);
1611
1612 if (in_range(start, state->start, state->end - state->start + 1)) {
1613 if (state->state & bits) {
1614
1615
1616
1617
1618
1619 start = state->end + 1;
1620 } else {
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630 *start_ret = state->start;
1631 break;
1632 }
1633 } else {
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645 if (prev) {
1646 state = rb_entry(prev, struct extent_state,
1647 rb_node);
1648 *start_ret = state->end + 1;
1649 } else {
1650 *start_ret = 0;
1651 }
1652 break;
1653 }
1654 }
1655
1656
1657
1658
1659
1660 while (1) {
1661 state = rb_entry(node, struct extent_state, rb_node);
1662 if (state->end >= start && !(state->state & bits)) {
1663 *end_ret = state->end;
1664 } else {
1665 *end_ret = state->start - 1;
1666 break;
1667 }
1668
1669 node = rb_next(node);
1670 if (!node)
1671 break;
1672 }
1673 out:
1674 spin_unlock(&tree->lock);
1675 }
1676
1677
1678
1679
1680
1681
1682
1683 static noinline bool find_delalloc_range(struct extent_io_tree *tree,
1684 u64 *start, u64 *end, u64 max_bytes,
1685 struct extent_state **cached_state)
1686 {
1687 struct rb_node *node;
1688 struct extent_state *state;
1689 u64 cur_start = *start;
1690 bool found = false;
1691 u64 total_bytes = 0;
1692
1693 spin_lock(&tree->lock);
1694
1695
1696
1697
1698
1699 node = tree_search(tree, cur_start);
1700 if (!node) {
1701 *end = (u64)-1;
1702 goto out;
1703 }
1704
1705 while (1) {
1706 state = rb_entry(node, struct extent_state, rb_node);
1707 if (found && (state->start != cur_start ||
1708 (state->state & EXTENT_BOUNDARY))) {
1709 goto out;
1710 }
1711 if (!(state->state & EXTENT_DELALLOC)) {
1712 if (!found)
1713 *end = state->end;
1714 goto out;
1715 }
1716 if (!found) {
1717 *start = state->start;
1718 *cached_state = state;
1719 refcount_inc(&state->refs);
1720 }
1721 found = true;
1722 *end = state->end;
1723 cur_start = state->end + 1;
1724 node = rb_next(node);
1725 total_bytes += state->end - state->start + 1;
1726 if (total_bytes >= max_bytes)
1727 break;
1728 if (!node)
1729 break;
1730 }
1731 out:
1732 spin_unlock(&tree->lock);
1733 return found;
1734 }
1735
1736 static int __process_pages_contig(struct address_space *mapping,
1737 struct page *locked_page,
1738 pgoff_t start_index, pgoff_t end_index,
1739 unsigned long page_ops, pgoff_t *index_ret);
1740
1741 static noinline void __unlock_for_delalloc(struct inode *inode,
1742 struct page *locked_page,
1743 u64 start, u64 end)
1744 {
1745 unsigned long index = start >> PAGE_SHIFT;
1746 unsigned long end_index = end >> PAGE_SHIFT;
1747
1748 ASSERT(locked_page);
1749 if (index == locked_page->index && end_index == index)
1750 return;
1751
1752 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1753 PAGE_UNLOCK, NULL);
1754 }
1755
1756 static noinline int lock_delalloc_pages(struct inode *inode,
1757 struct page *locked_page,
1758 u64 delalloc_start,
1759 u64 delalloc_end)
1760 {
1761 unsigned long index = delalloc_start >> PAGE_SHIFT;
1762 unsigned long index_ret = index;
1763 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1764 int ret;
1765
1766 ASSERT(locked_page);
1767 if (index == locked_page->index && index == end_index)
1768 return 0;
1769
1770 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1771 end_index, PAGE_LOCK, &index_ret);
1772 if (ret == -EAGAIN)
1773 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1774 (u64)index_ret << PAGE_SHIFT);
1775 return ret;
1776 }
1777
1778
1779
1780
1781
1782
1783
1784
1785 EXPORT_FOR_TESTS
1786 noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
1787 struct page *locked_page, u64 *start,
1788 u64 *end)
1789 {
1790 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1791 u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
1792 u64 delalloc_start;
1793 u64 delalloc_end;
1794 bool found;
1795 struct extent_state *cached_state = NULL;
1796 int ret;
1797 int loops = 0;
1798
1799 again:
1800
1801 delalloc_start = *start;
1802 delalloc_end = 0;
1803 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1804 max_bytes, &cached_state);
1805 if (!found || delalloc_end <= *start) {
1806 *start = delalloc_start;
1807 *end = delalloc_end;
1808 free_extent_state(cached_state);
1809 return false;
1810 }
1811
1812
1813
1814
1815
1816
1817 if (delalloc_start < *start)
1818 delalloc_start = *start;
1819
1820
1821
1822
1823 if (delalloc_end + 1 - delalloc_start > max_bytes)
1824 delalloc_end = delalloc_start + max_bytes - 1;
1825
1826
1827 ret = lock_delalloc_pages(inode, locked_page,
1828 delalloc_start, delalloc_end);
1829 ASSERT(!ret || ret == -EAGAIN);
1830 if (ret == -EAGAIN) {
1831
1832
1833
1834 free_extent_state(cached_state);
1835 cached_state = NULL;
1836 if (!loops) {
1837 max_bytes = PAGE_SIZE;
1838 loops = 1;
1839 goto again;
1840 } else {
1841 found = false;
1842 goto out_failed;
1843 }
1844 }
1845
1846
1847 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1848
1849
1850 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1851 EXTENT_DELALLOC, 1, cached_state);
1852 if (!ret) {
1853 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1854 &cached_state);
1855 __unlock_for_delalloc(inode, locked_page,
1856 delalloc_start, delalloc_end);
1857 cond_resched();
1858 goto again;
1859 }
1860 free_extent_state(cached_state);
1861 *start = delalloc_start;
1862 *end = delalloc_end;
1863 out_failed:
1864 return found;
1865 }
1866
1867 static int __process_pages_contig(struct address_space *mapping,
1868 struct page *locked_page,
1869 pgoff_t start_index, pgoff_t end_index,
1870 unsigned long page_ops, pgoff_t *index_ret)
1871 {
1872 unsigned long nr_pages = end_index - start_index + 1;
1873 unsigned long pages_locked = 0;
1874 pgoff_t index = start_index;
1875 struct page *pages[16];
1876 unsigned ret;
1877 int err = 0;
1878 int i;
1879
1880 if (page_ops & PAGE_LOCK) {
1881 ASSERT(page_ops == PAGE_LOCK);
1882 ASSERT(index_ret && *index_ret == start_index);
1883 }
1884
1885 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1886 mapping_set_error(mapping, -EIO);
1887
1888 while (nr_pages > 0) {
1889 ret = find_get_pages_contig(mapping, index,
1890 min_t(unsigned long,
1891 nr_pages, ARRAY_SIZE(pages)), pages);
1892 if (ret == 0) {
1893
1894
1895
1896
1897 ASSERT(page_ops & PAGE_LOCK);
1898 err = -EAGAIN;
1899 goto out;
1900 }
1901
1902 for (i = 0; i < ret; i++) {
1903 if (page_ops & PAGE_SET_PRIVATE2)
1904 SetPagePrivate2(pages[i]);
1905
1906 if (locked_page && pages[i] == locked_page) {
1907 put_page(pages[i]);
1908 pages_locked++;
1909 continue;
1910 }
1911 if (page_ops & PAGE_CLEAR_DIRTY)
1912 clear_page_dirty_for_io(pages[i]);
1913 if (page_ops & PAGE_SET_WRITEBACK)
1914 set_page_writeback(pages[i]);
1915 if (page_ops & PAGE_SET_ERROR)
1916 SetPageError(pages[i]);
1917 if (page_ops & PAGE_END_WRITEBACK)
1918 end_page_writeback(pages[i]);
1919 if (page_ops & PAGE_UNLOCK)
1920 unlock_page(pages[i]);
1921 if (page_ops & PAGE_LOCK) {
1922 lock_page(pages[i]);
1923 if (!PageDirty(pages[i]) ||
1924 pages[i]->mapping != mapping) {
1925 unlock_page(pages[i]);
1926 put_page(pages[i]);
1927 err = -EAGAIN;
1928 goto out;
1929 }
1930 }
1931 put_page(pages[i]);
1932 pages_locked++;
1933 }
1934 nr_pages -= ret;
1935 index += ret;
1936 cond_resched();
1937 }
1938 out:
1939 if (err && index_ret)
1940 *index_ret = start_index + pages_locked - 1;
1941 return err;
1942 }
1943
1944 void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1945 struct page *locked_page,
1946 unsigned clear_bits,
1947 unsigned long page_ops)
1948 {
1949 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
1950 NULL);
1951
1952 __process_pages_contig(inode->i_mapping, locked_page,
1953 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
1954 page_ops, NULL);
1955 }
1956
1957
1958
1959
1960
1961
1962 u64 count_range_bits(struct extent_io_tree *tree,
1963 u64 *start, u64 search_end, u64 max_bytes,
1964 unsigned bits, int contig)
1965 {
1966 struct rb_node *node;
1967 struct extent_state *state;
1968 u64 cur_start = *start;
1969 u64 total_bytes = 0;
1970 u64 last = 0;
1971 int found = 0;
1972
1973 if (WARN_ON(search_end <= cur_start))
1974 return 0;
1975
1976 spin_lock(&tree->lock);
1977 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1978 total_bytes = tree->dirty_bytes;
1979 goto out;
1980 }
1981
1982
1983
1984
1985 node = tree_search(tree, cur_start);
1986 if (!node)
1987 goto out;
1988
1989 while (1) {
1990 state = rb_entry(node, struct extent_state, rb_node);
1991 if (state->start > search_end)
1992 break;
1993 if (contig && found && state->start > last + 1)
1994 break;
1995 if (state->end >= cur_start && (state->state & bits) == bits) {
1996 total_bytes += min(search_end, state->end) + 1 -
1997 max(cur_start, state->start);
1998 if (total_bytes >= max_bytes)
1999 break;
2000 if (!found) {
2001 *start = max(cur_start, state->start);
2002 found = 1;
2003 }
2004 last = state->end;
2005 } else if (contig && found) {
2006 break;
2007 }
2008 node = rb_next(node);
2009 if (!node)
2010 break;
2011 }
2012 out:
2013 spin_unlock(&tree->lock);
2014 return total_bytes;
2015 }
2016
2017
2018
2019
2020
2021 static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
2022 struct io_failure_record *failrec)
2023 {
2024 struct rb_node *node;
2025 struct extent_state *state;
2026 int ret = 0;
2027
2028 spin_lock(&tree->lock);
2029
2030
2031
2032
2033 node = tree_search(tree, start);
2034 if (!node) {
2035 ret = -ENOENT;
2036 goto out;
2037 }
2038 state = rb_entry(node, struct extent_state, rb_node);
2039 if (state->start != start) {
2040 ret = -ENOENT;
2041 goto out;
2042 }
2043 state->failrec = failrec;
2044 out:
2045 spin_unlock(&tree->lock);
2046 return ret;
2047 }
2048
2049 static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
2050 struct io_failure_record **failrec)
2051 {
2052 struct rb_node *node;
2053 struct extent_state *state;
2054 int ret = 0;
2055
2056 spin_lock(&tree->lock);
2057
2058
2059
2060
2061 node = tree_search(tree, start);
2062 if (!node) {
2063 ret = -ENOENT;
2064 goto out;
2065 }
2066 state = rb_entry(node, struct extent_state, rb_node);
2067 if (state->start != start) {
2068 ret = -ENOENT;
2069 goto out;
2070 }
2071 *failrec = state->failrec;
2072 out:
2073 spin_unlock(&tree->lock);
2074 return ret;
2075 }
2076
2077
2078
2079
2080
2081
2082
2083 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
2084 unsigned bits, int filled, struct extent_state *cached)
2085 {
2086 struct extent_state *state = NULL;
2087 struct rb_node *node;
2088 int bitset = 0;
2089
2090 spin_lock(&tree->lock);
2091 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
2092 cached->end > start)
2093 node = &cached->rb_node;
2094 else
2095 node = tree_search(tree, start);
2096 while (node && start <= end) {
2097 state = rb_entry(node, struct extent_state, rb_node);
2098
2099 if (filled && state->start > start) {
2100 bitset = 0;
2101 break;
2102 }
2103
2104 if (state->start > end)
2105 break;
2106
2107 if (state->state & bits) {
2108 bitset = 1;
2109 if (!filled)
2110 break;
2111 } else if (filled) {
2112 bitset = 0;
2113 break;
2114 }
2115
2116 if (state->end == (u64)-1)
2117 break;
2118
2119 start = state->end + 1;
2120 if (start > end)
2121 break;
2122 node = rb_next(node);
2123 if (!node) {
2124 if (filled)
2125 bitset = 0;
2126 break;
2127 }
2128 }
2129 spin_unlock(&tree->lock);
2130 return bitset;
2131 }
2132
2133
2134
2135
2136
2137 static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
2138 {
2139 u64 start = page_offset(page);
2140 u64 end = start + PAGE_SIZE - 1;
2141 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
2142 SetPageUptodate(page);
2143 }
2144
2145 int free_io_failure(struct extent_io_tree *failure_tree,
2146 struct extent_io_tree *io_tree,
2147 struct io_failure_record *rec)
2148 {
2149 int ret;
2150 int err = 0;
2151
2152 set_state_failrec(failure_tree, rec->start, NULL);
2153 ret = clear_extent_bits(failure_tree, rec->start,
2154 rec->start + rec->len - 1,
2155 EXTENT_LOCKED | EXTENT_DIRTY);
2156 if (ret)
2157 err = ret;
2158
2159 ret = clear_extent_bits(io_tree, rec->start,
2160 rec->start + rec->len - 1,
2161 EXTENT_DAMAGED);
2162 if (ret && !err)
2163 err = ret;
2164
2165 kfree(rec);
2166 return err;
2167 }
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179 int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
2180 u64 length, u64 logical, struct page *page,
2181 unsigned int pg_offset, int mirror_num)
2182 {
2183 struct bio *bio;
2184 struct btrfs_device *dev;
2185 u64 map_length = 0;
2186 u64 sector;
2187 struct btrfs_bio *bbio = NULL;
2188 int ret;
2189
2190 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
2191 BUG_ON(!mirror_num);
2192
2193 bio = btrfs_io_bio_alloc(1);
2194 bio->bi_iter.bi_size = 0;
2195 map_length = length;
2196
2197
2198
2199
2200
2201
2202 btrfs_bio_counter_inc_blocked(fs_info);
2203 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2204
2205
2206
2207
2208
2209
2210 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2211 &map_length, &bbio, 0);
2212 if (ret) {
2213 btrfs_bio_counter_dec(fs_info);
2214 bio_put(bio);
2215 return -EIO;
2216 }
2217 ASSERT(bbio->mirror_num == 1);
2218 } else {
2219 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2220 &map_length, &bbio, mirror_num);
2221 if (ret) {
2222 btrfs_bio_counter_dec(fs_info);
2223 bio_put(bio);
2224 return -EIO;
2225 }
2226 BUG_ON(mirror_num != bbio->mirror_num);
2227 }
2228
2229 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2230 bio->bi_iter.bi_sector = sector;
2231 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2232 btrfs_put_bbio(bbio);
2233 if (!dev || !dev->bdev ||
2234 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2235 btrfs_bio_counter_dec(fs_info);
2236 bio_put(bio);
2237 return -EIO;
2238 }
2239 bio_set_dev(bio, dev->bdev);
2240 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2241 bio_add_page(bio, page, length, pg_offset);
2242
2243 if (btrfsic_submit_bio_wait(bio)) {
2244
2245 btrfs_bio_counter_dec(fs_info);
2246 bio_put(bio);
2247 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2248 return -EIO;
2249 }
2250
2251 btrfs_info_rl_in_rcu(fs_info,
2252 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2253 ino, start,
2254 rcu_str_deref(dev->name), sector);
2255 btrfs_bio_counter_dec(fs_info);
2256 bio_put(bio);
2257 return 0;
2258 }
2259
2260 int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
2261 {
2262 struct btrfs_fs_info *fs_info = eb->fs_info;
2263 u64 start = eb->start;
2264 int i, num_pages = num_extent_pages(eb);
2265 int ret = 0;
2266
2267 if (sb_rdonly(fs_info->sb))
2268 return -EROFS;
2269
2270 for (i = 0; i < num_pages; i++) {
2271 struct page *p = eb->pages[i];
2272
2273 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2274 start - page_offset(p), mirror_num);
2275 if (ret)
2276 break;
2277 start += PAGE_SIZE;
2278 }
2279
2280 return ret;
2281 }
2282
2283
2284
2285
2286
2287 int clean_io_failure(struct btrfs_fs_info *fs_info,
2288 struct extent_io_tree *failure_tree,
2289 struct extent_io_tree *io_tree, u64 start,
2290 struct page *page, u64 ino, unsigned int pg_offset)
2291 {
2292 u64 private;
2293 struct io_failure_record *failrec;
2294 struct extent_state *state;
2295 int num_copies;
2296 int ret;
2297
2298 private = 0;
2299 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2300 EXTENT_DIRTY, 0);
2301 if (!ret)
2302 return 0;
2303
2304 ret = get_state_failrec(failure_tree, start, &failrec);
2305 if (ret)
2306 return 0;
2307
2308 BUG_ON(!failrec->this_mirror);
2309
2310 if (failrec->in_validation) {
2311
2312 btrfs_debug(fs_info,
2313 "clean_io_failure: freeing dummy error at %llu",
2314 failrec->start);
2315 goto out;
2316 }
2317 if (sb_rdonly(fs_info->sb))
2318 goto out;
2319
2320 spin_lock(&io_tree->lock);
2321 state = find_first_extent_bit_state(io_tree,
2322 failrec->start,
2323 EXTENT_LOCKED);
2324 spin_unlock(&io_tree->lock);
2325
2326 if (state && state->start <= failrec->start &&
2327 state->end >= failrec->start + failrec->len - 1) {
2328 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2329 failrec->len);
2330 if (num_copies > 1) {
2331 repair_io_failure(fs_info, ino, start, failrec->len,
2332 failrec->logical, page, pg_offset,
2333 failrec->failed_mirror);
2334 }
2335 }
2336
2337 out:
2338 free_io_failure(failure_tree, io_tree, failrec);
2339
2340 return 0;
2341 }
2342
2343
2344
2345
2346
2347
2348
2349 void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2350 {
2351 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2352 struct io_failure_record *failrec;
2353 struct extent_state *state, *next;
2354
2355 if (RB_EMPTY_ROOT(&failure_tree->state))
2356 return;
2357
2358 spin_lock(&failure_tree->lock);
2359 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2360 while (state) {
2361 if (state->start > end)
2362 break;
2363
2364 ASSERT(state->end <= end);
2365
2366 next = next_state(state);
2367
2368 failrec = state->failrec;
2369 free_extent_state(state);
2370 kfree(failrec);
2371
2372 state = next;
2373 }
2374 spin_unlock(&failure_tree->lock);
2375 }
2376
2377 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2378 struct io_failure_record **failrec_ret)
2379 {
2380 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2381 struct io_failure_record *failrec;
2382 struct extent_map *em;
2383 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2384 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2385 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2386 int ret;
2387 u64 logical;
2388
2389 ret = get_state_failrec(failure_tree, start, &failrec);
2390 if (ret) {
2391 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2392 if (!failrec)
2393 return -ENOMEM;
2394
2395 failrec->start = start;
2396 failrec->len = end - start + 1;
2397 failrec->this_mirror = 0;
2398 failrec->bio_flags = 0;
2399 failrec->in_validation = 0;
2400
2401 read_lock(&em_tree->lock);
2402 em = lookup_extent_mapping(em_tree, start, failrec->len);
2403 if (!em) {
2404 read_unlock(&em_tree->lock);
2405 kfree(failrec);
2406 return -EIO;
2407 }
2408
2409 if (em->start > start || em->start + em->len <= start) {
2410 free_extent_map(em);
2411 em = NULL;
2412 }
2413 read_unlock(&em_tree->lock);
2414 if (!em) {
2415 kfree(failrec);
2416 return -EIO;
2417 }
2418
2419 logical = start - em->start;
2420 logical = em->block_start + logical;
2421 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2422 logical = em->block_start;
2423 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2424 extent_set_compress_type(&failrec->bio_flags,
2425 em->compress_type);
2426 }
2427
2428 btrfs_debug(fs_info,
2429 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2430 logical, start, failrec->len);
2431
2432 failrec->logical = logical;
2433 free_extent_map(em);
2434
2435
2436 ret = set_extent_bits(failure_tree, start, end,
2437 EXTENT_LOCKED | EXTENT_DIRTY);
2438 if (ret >= 0)
2439 ret = set_state_failrec(failure_tree, start, failrec);
2440
2441 if (ret >= 0)
2442 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2443 if (ret < 0) {
2444 kfree(failrec);
2445 return ret;
2446 }
2447 } else {
2448 btrfs_debug(fs_info,
2449 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2450 failrec->logical, failrec->start, failrec->len,
2451 failrec->in_validation);
2452
2453
2454
2455
2456
2457 }
2458
2459 *failrec_ret = failrec;
2460
2461 return 0;
2462 }
2463
2464 bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
2465 struct io_failure_record *failrec, int failed_mirror)
2466 {
2467 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2468 int num_copies;
2469
2470 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2471 if (num_copies == 1) {
2472
2473
2474
2475
2476
2477 btrfs_debug(fs_info,
2478 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2479 num_copies, failrec->this_mirror, failed_mirror);
2480 return false;
2481 }
2482
2483
2484
2485
2486
2487
2488 if (failed_bio_pages > 1) {
2489
2490
2491
2492
2493
2494
2495
2496
2497 BUG_ON(failrec->in_validation);
2498 failrec->in_validation = 1;
2499 failrec->this_mirror = failed_mirror;
2500 } else {
2501
2502
2503
2504
2505
2506 if (failrec->in_validation) {
2507 BUG_ON(failrec->this_mirror != failed_mirror);
2508 failrec->in_validation = 0;
2509 failrec->this_mirror = 0;
2510 }
2511 failrec->failed_mirror = failed_mirror;
2512 failrec->this_mirror++;
2513 if (failrec->this_mirror == failed_mirror)
2514 failrec->this_mirror++;
2515 }
2516
2517 if (failrec->this_mirror > num_copies) {
2518 btrfs_debug(fs_info,
2519 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2520 num_copies, failrec->this_mirror, failed_mirror);
2521 return false;
2522 }
2523
2524 return true;
2525 }
2526
2527
2528 struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2529 struct io_failure_record *failrec,
2530 struct page *page, int pg_offset, int icsum,
2531 bio_end_io_t *endio_func, void *data)
2532 {
2533 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2534 struct bio *bio;
2535 struct btrfs_io_bio *btrfs_failed_bio;
2536 struct btrfs_io_bio *btrfs_bio;
2537
2538 bio = btrfs_io_bio_alloc(1);
2539 bio->bi_end_io = endio_func;
2540 bio->bi_iter.bi_sector = failrec->logical >> 9;
2541 bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
2542 bio->bi_iter.bi_size = 0;
2543 bio->bi_private = data;
2544
2545 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2546 if (btrfs_failed_bio->csum) {
2547 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2548
2549 btrfs_bio = btrfs_io_bio(bio);
2550 btrfs_bio->csum = btrfs_bio->csum_inline;
2551 icsum *= csum_size;
2552 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2553 csum_size);
2554 }
2555
2556 bio_add_page(bio, page, failrec->len, pg_offset);
2557
2558 return bio;
2559 }
2560
2561
2562
2563
2564
2565
2566
2567 static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2568 struct page *page, u64 start, u64 end,
2569 int failed_mirror)
2570 {
2571 struct io_failure_record *failrec;
2572 struct inode *inode = page->mapping->host;
2573 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2574 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2575 struct bio *bio;
2576 int read_mode = 0;
2577 blk_status_t status;
2578 int ret;
2579 unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
2580
2581 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2582
2583 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2584 if (ret)
2585 return ret;
2586
2587 if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
2588 failed_mirror)) {
2589 free_io_failure(failure_tree, tree, failrec);
2590 return -EIO;
2591 }
2592
2593 if (failed_bio_pages > 1)
2594 read_mode |= REQ_FAILFAST_DEV;
2595
2596 phy_offset >>= inode->i_sb->s_blocksize_bits;
2597 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2598 start - page_offset(page),
2599 (int)phy_offset, failed_bio->bi_end_io,
2600 NULL);
2601 bio->bi_opf = REQ_OP_READ | read_mode;
2602
2603 btrfs_debug(btrfs_sb(inode->i_sb),
2604 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2605 read_mode, failrec->this_mirror, failrec->in_validation);
2606
2607 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2608 failrec->bio_flags);
2609 if (status) {
2610 free_io_failure(failure_tree, tree, failrec);
2611 bio_put(bio);
2612 ret = blk_status_to_errno(status);
2613 }
2614
2615 return ret;
2616 }
2617
2618
2619
2620 void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2621 {
2622 int uptodate = (err == 0);
2623 int ret = 0;
2624
2625 btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
2626
2627 if (!uptodate) {
2628 ClearPageUptodate(page);
2629 SetPageError(page);
2630 ret = err < 0 ? err : -EIO;
2631 mapping_set_error(page->mapping, ret);
2632 }
2633 }
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644 static void end_bio_extent_writepage(struct bio *bio)
2645 {
2646 int error = blk_status_to_errno(bio->bi_status);
2647 struct bio_vec *bvec;
2648 u64 start;
2649 u64 end;
2650 struct bvec_iter_all iter_all;
2651
2652 ASSERT(!bio_flagged(bio, BIO_CLONED));
2653 bio_for_each_segment_all(bvec, bio, iter_all) {
2654 struct page *page = bvec->bv_page;
2655 struct inode *inode = page->mapping->host;
2656 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2657
2658
2659
2660
2661
2662
2663 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2664 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2665 btrfs_err(fs_info,
2666 "partial page write in btrfs with offset %u and length %u",
2667 bvec->bv_offset, bvec->bv_len);
2668 else
2669 btrfs_info(fs_info,
2670 "incomplete page write in btrfs with offset %u and length %u",
2671 bvec->bv_offset, bvec->bv_len);
2672 }
2673
2674 start = page_offset(page);
2675 end = start + bvec->bv_offset + bvec->bv_len - 1;
2676
2677 end_extent_writepage(page, error, start, end);
2678 end_page_writeback(page);
2679 }
2680
2681 bio_put(bio);
2682 }
2683
2684 static void
2685 endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2686 int uptodate)
2687 {
2688 struct extent_state *cached = NULL;
2689 u64 end = start + len - 1;
2690
2691 if (uptodate && tree->track_uptodate)
2692 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2693 unlock_extent_cached_atomic(tree, start, end, &cached);
2694 }
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707 static void end_bio_extent_readpage(struct bio *bio)
2708 {
2709 struct bio_vec *bvec;
2710 int uptodate = !bio->bi_status;
2711 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2712 struct extent_io_tree *tree, *failure_tree;
2713 u64 offset = 0;
2714 u64 start;
2715 u64 end;
2716 u64 len;
2717 u64 extent_start = 0;
2718 u64 extent_len = 0;
2719 int mirror;
2720 int ret;
2721 struct bvec_iter_all iter_all;
2722
2723 ASSERT(!bio_flagged(bio, BIO_CLONED));
2724 bio_for_each_segment_all(bvec, bio, iter_all) {
2725 struct page *page = bvec->bv_page;
2726 struct inode *inode = page->mapping->host;
2727 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2728 bool data_inode = btrfs_ino(BTRFS_I(inode))
2729 != BTRFS_BTREE_INODE_OBJECTID;
2730
2731 btrfs_debug(fs_info,
2732 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2733 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2734 io_bio->mirror_num);
2735 tree = &BTRFS_I(inode)->io_tree;
2736 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2737
2738
2739
2740
2741
2742
2743 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2744 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2745 btrfs_err(fs_info,
2746 "partial page read in btrfs with offset %u and length %u",
2747 bvec->bv_offset, bvec->bv_len);
2748 else
2749 btrfs_info(fs_info,
2750 "incomplete page read in btrfs with offset %u and length %u",
2751 bvec->bv_offset, bvec->bv_len);
2752 }
2753
2754 start = page_offset(page);
2755 end = start + bvec->bv_offset + bvec->bv_len - 1;
2756 len = bvec->bv_len;
2757
2758 mirror = io_bio->mirror_num;
2759 if (likely(uptodate)) {
2760 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2761 page, start, end,
2762 mirror);
2763 if (ret)
2764 uptodate = 0;
2765 else
2766 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2767 failure_tree, tree, start,
2768 page,
2769 btrfs_ino(BTRFS_I(inode)), 0);
2770 }
2771
2772 if (likely(uptodate))
2773 goto readpage_ok;
2774
2775 if (data_inode) {
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787 ret = bio_readpage_error(bio, offset, page, start, end,
2788 mirror);
2789 if (ret == 0) {
2790 uptodate = !bio->bi_status;
2791 offset += len;
2792 continue;
2793 }
2794 } else {
2795 struct extent_buffer *eb;
2796
2797 eb = (struct extent_buffer *)page->private;
2798 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
2799 eb->read_mirror = mirror;
2800 atomic_dec(&eb->io_pages);
2801 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
2802 &eb->bflags))
2803 btree_readahead_hook(eb, -EIO);
2804 }
2805 readpage_ok:
2806 if (likely(uptodate)) {
2807 loff_t i_size = i_size_read(inode);
2808 pgoff_t end_index = i_size >> PAGE_SHIFT;
2809 unsigned off;
2810
2811
2812 off = offset_in_page(i_size);
2813 if (page->index == end_index && off)
2814 zero_user_segment(page, off, PAGE_SIZE);
2815 SetPageUptodate(page);
2816 } else {
2817 ClearPageUptodate(page);
2818 SetPageError(page);
2819 }
2820 unlock_page(page);
2821 offset += len;
2822
2823 if (unlikely(!uptodate)) {
2824 if (extent_len) {
2825 endio_readpage_release_extent(tree,
2826 extent_start,
2827 extent_len, 1);
2828 extent_start = 0;
2829 extent_len = 0;
2830 }
2831 endio_readpage_release_extent(tree, start,
2832 end - start + 1, 0);
2833 } else if (!extent_len) {
2834 extent_start = start;
2835 extent_len = end + 1 - start;
2836 } else if (extent_start + extent_len == start) {
2837 extent_len += end + 1 - start;
2838 } else {
2839 endio_readpage_release_extent(tree, extent_start,
2840 extent_len, uptodate);
2841 extent_start = start;
2842 extent_len = end + 1 - start;
2843 }
2844 }
2845
2846 if (extent_len)
2847 endio_readpage_release_extent(tree, extent_start, extent_len,
2848 uptodate);
2849 btrfs_io_bio_free_csum(io_bio);
2850 bio_put(bio);
2851 }
2852
2853
2854
2855
2856
2857
2858 static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
2859 {
2860 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2861 }
2862
2863
2864
2865
2866
2867
2868 struct bio *btrfs_bio_alloc(u64 first_byte)
2869 {
2870 struct bio *bio;
2871
2872 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
2873 bio->bi_iter.bi_sector = first_byte >> 9;
2874 btrfs_io_bio_init(btrfs_io_bio(bio));
2875 return bio;
2876 }
2877
2878 struct bio *btrfs_bio_clone(struct bio *bio)
2879 {
2880 struct btrfs_io_bio *btrfs_bio;
2881 struct bio *new;
2882
2883
2884 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
2885 btrfs_bio = btrfs_io_bio(new);
2886 btrfs_io_bio_init(btrfs_bio);
2887 btrfs_bio->iter = bio->bi_iter;
2888 return new;
2889 }
2890
2891 struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
2892 {
2893 struct bio *bio;
2894
2895
2896 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
2897 btrfs_io_bio_init(btrfs_io_bio(bio));
2898 return bio;
2899 }
2900
2901 struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
2902 {
2903 struct bio *bio;
2904 struct btrfs_io_bio *btrfs_bio;
2905
2906
2907 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
2908 ASSERT(bio);
2909
2910 btrfs_bio = btrfs_io_bio(bio);
2911 btrfs_io_bio_init(btrfs_bio);
2912
2913 bio_trim(bio, offset >> 9, size >> 9);
2914 btrfs_bio->iter = bio->bi_iter;
2915 return bio;
2916 }
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934 static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2935 struct writeback_control *wbc,
2936 struct page *page, u64 offset,
2937 size_t size, unsigned long pg_offset,
2938 struct block_device *bdev,
2939 struct bio **bio_ret,
2940 bio_end_io_t end_io_func,
2941 int mirror_num,
2942 unsigned long prev_bio_flags,
2943 unsigned long bio_flags,
2944 bool force_bio_submit)
2945 {
2946 int ret = 0;
2947 struct bio *bio;
2948 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2949 sector_t sector = offset >> 9;
2950
2951 ASSERT(bio_ret);
2952
2953 if (*bio_ret) {
2954 bool contig;
2955 bool can_merge = true;
2956
2957 bio = *bio_ret;
2958 if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
2959 contig = bio->bi_iter.bi_sector == sector;
2960 else
2961 contig = bio_end_sector(bio) == sector;
2962
2963 ASSERT(tree->ops);
2964 if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags))
2965 can_merge = false;
2966
2967 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
2968 force_bio_submit ||
2969 bio_add_page(bio, page, page_size, pg_offset) < page_size) {
2970 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
2971 if (ret < 0) {
2972 *bio_ret = NULL;
2973 return ret;
2974 }
2975 bio = NULL;
2976 } else {
2977 if (wbc)
2978 wbc_account_cgroup_owner(wbc, page, page_size);
2979 return 0;
2980 }
2981 }
2982
2983 bio = btrfs_bio_alloc(offset);
2984 bio_set_dev(bio, bdev);
2985 bio_add_page(bio, page, page_size, pg_offset);
2986 bio->bi_end_io = end_io_func;
2987 bio->bi_private = tree;
2988 bio->bi_write_hint = page->mapping->host->i_write_hint;
2989 bio->bi_opf = opf;
2990 if (wbc) {
2991 wbc_init_bio(wbc, bio);
2992 wbc_account_cgroup_owner(wbc, page, page_size);
2993 }
2994
2995 *bio_ret = bio;
2996
2997 return ret;
2998 }
2999
3000 static void attach_extent_buffer_page(struct extent_buffer *eb,
3001 struct page *page)
3002 {
3003 if (!PagePrivate(page)) {
3004 SetPagePrivate(page);
3005 get_page(page);
3006 set_page_private(page, (unsigned long)eb);
3007 } else {
3008 WARN_ON(page->private != (unsigned long)eb);
3009 }
3010 }
3011
3012 void set_page_extent_mapped(struct page *page)
3013 {
3014 if (!PagePrivate(page)) {
3015 SetPagePrivate(page);
3016 get_page(page);
3017 set_page_private(page, EXTENT_PAGE_PRIVATE);
3018 }
3019 }
3020
3021 static struct extent_map *
3022 __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
3023 u64 start, u64 len, get_extent_t *get_extent,
3024 struct extent_map **em_cached)
3025 {
3026 struct extent_map *em;
3027
3028 if (em_cached && *em_cached) {
3029 em = *em_cached;
3030 if (extent_map_in_tree(em) && start >= em->start &&
3031 start < extent_map_end(em)) {
3032 refcount_inc(&em->refs);
3033 return em;
3034 }
3035
3036 free_extent_map(em);
3037 *em_cached = NULL;
3038 }
3039
3040 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
3041 if (em_cached && !IS_ERR_OR_NULL(em)) {
3042 BUG_ON(*em_cached);
3043 refcount_inc(&em->refs);
3044 *em_cached = em;
3045 }
3046 return em;
3047 }
3048
3049
3050
3051
3052
3053
3054
3055 static int __do_readpage(struct extent_io_tree *tree,
3056 struct page *page,
3057 get_extent_t *get_extent,
3058 struct extent_map **em_cached,
3059 struct bio **bio, int mirror_num,
3060 unsigned long *bio_flags, unsigned int read_flags,
3061 u64 *prev_em_start)
3062 {
3063 struct inode *inode = page->mapping->host;
3064 u64 start = page_offset(page);
3065 const u64 end = start + PAGE_SIZE - 1;
3066 u64 cur = start;
3067 u64 extent_offset;
3068 u64 last_byte = i_size_read(inode);
3069 u64 block_start;
3070 u64 cur_end;
3071 struct extent_map *em;
3072 struct block_device *bdev;
3073 int ret = 0;
3074 int nr = 0;
3075 size_t pg_offset = 0;
3076 size_t iosize;
3077 size_t disk_io_size;
3078 size_t blocksize = inode->i_sb->s_blocksize;
3079 unsigned long this_bio_flag = 0;
3080
3081 set_page_extent_mapped(page);
3082
3083 if (!PageUptodate(page)) {
3084 if (cleancache_get_page(page) == 0) {
3085 BUG_ON(blocksize != PAGE_SIZE);
3086 unlock_extent(tree, start, end);
3087 goto out;
3088 }
3089 }
3090
3091 if (page->index == last_byte >> PAGE_SHIFT) {
3092 char *userpage;
3093 size_t zero_offset = offset_in_page(last_byte);
3094
3095 if (zero_offset) {
3096 iosize = PAGE_SIZE - zero_offset;
3097 userpage = kmap_atomic(page);
3098 memset(userpage + zero_offset, 0, iosize);
3099 flush_dcache_page(page);
3100 kunmap_atomic(userpage);
3101 }
3102 }
3103 while (cur <= end) {
3104 bool force_bio_submit = false;
3105 u64 offset;
3106
3107 if (cur >= last_byte) {
3108 char *userpage;
3109 struct extent_state *cached = NULL;
3110
3111 iosize = PAGE_SIZE - pg_offset;
3112 userpage = kmap_atomic(page);
3113 memset(userpage + pg_offset, 0, iosize);
3114 flush_dcache_page(page);
3115 kunmap_atomic(userpage);
3116 set_extent_uptodate(tree, cur, cur + iosize - 1,
3117 &cached, GFP_NOFS);
3118 unlock_extent_cached(tree, cur,
3119 cur + iosize - 1, &cached);
3120 break;
3121 }
3122 em = __get_extent_map(inode, page, pg_offset, cur,
3123 end - cur + 1, get_extent, em_cached);
3124 if (IS_ERR_OR_NULL(em)) {
3125 SetPageError(page);
3126 unlock_extent(tree, cur, end);
3127 break;
3128 }
3129 extent_offset = cur - em->start;
3130 BUG_ON(extent_map_end(em) <= cur);
3131 BUG_ON(end < cur);
3132
3133 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
3134 this_bio_flag |= EXTENT_BIO_COMPRESSED;
3135 extent_set_compress_type(&this_bio_flag,
3136 em->compress_type);
3137 }
3138
3139 iosize = min(extent_map_end(em) - cur, end - cur + 1);
3140 cur_end = min(extent_map_end(em) - 1, end);
3141 iosize = ALIGN(iosize, blocksize);
3142 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
3143 disk_io_size = em->block_len;
3144 offset = em->block_start;
3145 } else {
3146 offset = em->block_start + extent_offset;
3147 disk_io_size = iosize;
3148 }
3149 bdev = em->bdev;
3150 block_start = em->block_start;
3151 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3152 block_start = EXTENT_MAP_HOLE;
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3189 prev_em_start && *prev_em_start != (u64)-1 &&
3190 *prev_em_start != em->start)
3191 force_bio_submit = true;
3192
3193 if (prev_em_start)
3194 *prev_em_start = em->start;
3195
3196 free_extent_map(em);
3197 em = NULL;
3198
3199
3200 if (block_start == EXTENT_MAP_HOLE) {
3201 char *userpage;
3202 struct extent_state *cached = NULL;
3203
3204 userpage = kmap_atomic(page);
3205 memset(userpage + pg_offset, 0, iosize);
3206 flush_dcache_page(page);
3207 kunmap_atomic(userpage);
3208
3209 set_extent_uptodate(tree, cur, cur + iosize - 1,
3210 &cached, GFP_NOFS);
3211 unlock_extent_cached(tree, cur,
3212 cur + iosize - 1, &cached);
3213 cur = cur + iosize;
3214 pg_offset += iosize;
3215 continue;
3216 }
3217
3218 if (test_range_bit(tree, cur, cur_end,
3219 EXTENT_UPTODATE, 1, NULL)) {
3220 check_page_uptodate(tree, page);
3221 unlock_extent(tree, cur, cur + iosize - 1);
3222 cur = cur + iosize;
3223 pg_offset += iosize;
3224 continue;
3225 }
3226
3227
3228
3229 if (block_start == EXTENT_MAP_INLINE) {
3230 SetPageError(page);
3231 unlock_extent(tree, cur, cur + iosize - 1);
3232 cur = cur + iosize;
3233 pg_offset += iosize;
3234 continue;
3235 }
3236
3237 ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
3238 page, offset, disk_io_size,
3239 pg_offset, bdev, bio,
3240 end_bio_extent_readpage, mirror_num,
3241 *bio_flags,
3242 this_bio_flag,
3243 force_bio_submit);
3244 if (!ret) {
3245 nr++;
3246 *bio_flags = this_bio_flag;
3247 } else {
3248 SetPageError(page);
3249 unlock_extent(tree, cur, cur + iosize - 1);
3250 goto out;
3251 }
3252 cur = cur + iosize;
3253 pg_offset += iosize;
3254 }
3255 out:
3256 if (!nr) {
3257 if (!PageError(page))
3258 SetPageUptodate(page);
3259 unlock_page(page);
3260 }
3261 return ret;
3262 }
3263
3264 static inline void contiguous_readpages(struct extent_io_tree *tree,
3265 struct page *pages[], int nr_pages,
3266 u64 start, u64 end,
3267 struct extent_map **em_cached,
3268 struct bio **bio,
3269 unsigned long *bio_flags,
3270 u64 *prev_em_start)
3271 {
3272 struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
3273 int index;
3274
3275 btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
3276
3277 for (index = 0; index < nr_pages; index++) {
3278 __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
3279 bio, 0, bio_flags, REQ_RAHEAD, prev_em_start);
3280 put_page(pages[index]);
3281 }
3282 }
3283
3284 static int __extent_read_full_page(struct extent_io_tree *tree,
3285 struct page *page,
3286 get_extent_t *get_extent,
3287 struct bio **bio, int mirror_num,
3288 unsigned long *bio_flags,
3289 unsigned int read_flags)
3290 {
3291 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
3292 u64 start = page_offset(page);
3293 u64 end = start + PAGE_SIZE - 1;
3294 int ret;
3295
3296 btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
3297
3298 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3299 bio_flags, read_flags, NULL);
3300 return ret;
3301 }
3302
3303 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3304 get_extent_t *get_extent, int mirror_num)
3305 {
3306 struct bio *bio = NULL;
3307 unsigned long bio_flags = 0;
3308 int ret;
3309
3310 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3311 &bio_flags, 0);
3312 if (bio)
3313 ret = submit_one_bio(bio, mirror_num, bio_flags);
3314 return ret;
3315 }
3316
3317 static void update_nr_written(struct writeback_control *wbc,
3318 unsigned long nr_written)
3319 {
3320 wbc->nr_to_write -= nr_written;
3321 }
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333 static noinline_for_stack int writepage_delalloc(struct inode *inode,
3334 struct page *page, struct writeback_control *wbc,
3335 u64 delalloc_start, unsigned long *nr_written)
3336 {
3337 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3338 bool found;
3339 u64 delalloc_to_write = 0;
3340 u64 delalloc_end = 0;
3341 int ret;
3342 int page_started = 0;
3343
3344
3345 while (delalloc_end < page_end) {
3346 found = find_lock_delalloc_range(inode, page,
3347 &delalloc_start,
3348 &delalloc_end);
3349 if (!found) {
3350 delalloc_start = delalloc_end + 1;
3351 continue;
3352 }
3353 ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
3354 delalloc_end, &page_started, nr_written, wbc);
3355 if (ret) {
3356 SetPageError(page);
3357
3358
3359
3360
3361
3362
3363 ret = ret < 0 ? ret : -EIO;
3364 goto done;
3365 }
3366
3367
3368
3369
3370 delalloc_to_write += (delalloc_end - delalloc_start +
3371 PAGE_SIZE) >> PAGE_SHIFT;
3372 delalloc_start = delalloc_end + 1;
3373 }
3374 if (wbc->nr_to_write < delalloc_to_write) {
3375 int thresh = 8192;
3376
3377 if (delalloc_to_write < thresh * 2)
3378 thresh = delalloc_to_write;
3379 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3380 thresh);
3381 }
3382
3383
3384
3385
3386 if (page_started) {
3387
3388
3389
3390
3391
3392 wbc->nr_to_write -= *nr_written;
3393 return 1;
3394 }
3395
3396 ret = 0;
3397
3398 done:
3399 return ret;
3400 }
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410 static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3411 struct page *page,
3412 struct writeback_control *wbc,
3413 struct extent_page_data *epd,
3414 loff_t i_size,
3415 unsigned long nr_written,
3416 unsigned int write_flags, int *nr_ret)
3417 {
3418 struct extent_io_tree *tree = epd->tree;
3419 u64 start = page_offset(page);
3420 u64 page_end = start + PAGE_SIZE - 1;
3421 u64 end;
3422 u64 cur = start;
3423 u64 extent_offset;
3424 u64 block_start;
3425 u64 iosize;
3426 struct extent_map *em;
3427 struct block_device *bdev;
3428 size_t pg_offset = 0;
3429 size_t blocksize;
3430 int ret = 0;
3431 int nr = 0;
3432 bool compressed;
3433
3434 ret = btrfs_writepage_cow_fixup(page, start, page_end);
3435 if (ret) {
3436
3437 if (ret == -EBUSY)
3438 wbc->pages_skipped++;
3439 else
3440 redirty_page_for_writepage(wbc, page);
3441
3442 update_nr_written(wbc, nr_written);
3443 unlock_page(page);
3444 return 1;
3445 }
3446
3447
3448
3449
3450
3451 update_nr_written(wbc, nr_written + 1);
3452
3453 end = page_end;
3454 if (i_size <= start) {
3455 btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
3456 goto done;
3457 }
3458
3459 blocksize = inode->i_sb->s_blocksize;
3460
3461 while (cur <= end) {
3462 u64 em_end;
3463 u64 offset;
3464
3465 if (cur >= i_size) {
3466 btrfs_writepage_endio_finish_ordered(page, cur,
3467 page_end, 1);
3468 break;
3469 }
3470 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
3471 end - cur + 1, 1);
3472 if (IS_ERR_OR_NULL(em)) {
3473 SetPageError(page);
3474 ret = PTR_ERR_OR_ZERO(em);
3475 break;
3476 }
3477
3478 extent_offset = cur - em->start;
3479 em_end = extent_map_end(em);
3480 BUG_ON(em_end <= cur);
3481 BUG_ON(end < cur);
3482 iosize = min(em_end - cur, end - cur + 1);
3483 iosize = ALIGN(iosize, blocksize);
3484 offset = em->block_start + extent_offset;
3485 bdev = em->bdev;
3486 block_start = em->block_start;
3487 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3488 free_extent_map(em);
3489 em = NULL;
3490
3491
3492
3493
3494
3495 if (compressed || block_start == EXTENT_MAP_HOLE ||
3496 block_start == EXTENT_MAP_INLINE) {
3497
3498
3499
3500
3501 if (!compressed)
3502 btrfs_writepage_endio_finish_ordered(page, cur,
3503 cur + iosize - 1,
3504 1);
3505 else if (compressed) {
3506
3507
3508
3509
3510 nr++;
3511 }
3512
3513 cur += iosize;
3514 pg_offset += iosize;
3515 continue;
3516 }
3517
3518 btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
3519 if (!PageWriteback(page)) {
3520 btrfs_err(BTRFS_I(inode)->root->fs_info,
3521 "page %lu not writeback, cur %llu end %llu",
3522 page->index, cur, end);
3523 }
3524
3525 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3526 page, offset, iosize, pg_offset,
3527 bdev, &epd->bio,
3528 end_bio_extent_writepage,
3529 0, 0, 0, false);
3530 if (ret) {
3531 SetPageError(page);
3532 if (PageWriteback(page))
3533 end_page_writeback(page);
3534 }
3535
3536 cur = cur + iosize;
3537 pg_offset += iosize;
3538 nr++;
3539 }
3540 done:
3541 *nr_ret = nr;
3542 return ret;
3543 }
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554 static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3555 struct extent_page_data *epd)
3556 {
3557 struct inode *inode = page->mapping->host;
3558 u64 start = page_offset(page);
3559 u64 page_end = start + PAGE_SIZE - 1;
3560 int ret;
3561 int nr = 0;
3562 size_t pg_offset = 0;
3563 loff_t i_size = i_size_read(inode);
3564 unsigned long end_index = i_size >> PAGE_SHIFT;
3565 unsigned int write_flags = 0;
3566 unsigned long nr_written = 0;
3567
3568 write_flags = wbc_to_write_flags(wbc);
3569
3570 trace___extent_writepage(page, inode, wbc);
3571
3572 WARN_ON(!PageLocked(page));
3573
3574 ClearPageError(page);
3575
3576 pg_offset = offset_in_page(i_size);
3577 if (page->index > end_index ||
3578 (page->index == end_index && !pg_offset)) {
3579 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3580 unlock_page(page);
3581 return 0;
3582 }
3583
3584 if (page->index == end_index) {
3585 char *userpage;
3586
3587 userpage = kmap_atomic(page);
3588 memset(userpage + pg_offset, 0,
3589 PAGE_SIZE - pg_offset);
3590 kunmap_atomic(userpage);
3591 flush_dcache_page(page);
3592 }
3593
3594 pg_offset = 0;
3595
3596 set_page_extent_mapped(page);
3597
3598 if (!epd->extent_locked) {
3599 ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
3600 if (ret == 1)
3601 goto done_unlocked;
3602 if (ret)
3603 goto done;
3604 }
3605
3606 ret = __extent_writepage_io(inode, page, wbc, epd,
3607 i_size, nr_written, write_flags, &nr);
3608 if (ret == 1)
3609 goto done_unlocked;
3610
3611 done:
3612 if (nr == 0) {
3613
3614 set_page_writeback(page);
3615 end_page_writeback(page);
3616 }
3617 if (PageError(page)) {
3618 ret = ret < 0 ? ret : -EIO;
3619 end_extent_writepage(page, ret, start, page_end);
3620 }
3621 unlock_page(page);
3622 ASSERT(ret <= 0);
3623 return ret;
3624
3625 done_unlocked:
3626 return 0;
3627 }
3628
3629 void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3630 {
3631 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3632 TASK_UNINTERRUPTIBLE);
3633 }
3634
3635 static void end_extent_buffer_writeback(struct extent_buffer *eb)
3636 {
3637 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3638 smp_mb__after_atomic();
3639 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3640 }
3641
3642
3643
3644
3645
3646
3647
3648
3649 static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
3650 struct extent_page_data *epd)
3651 {
3652 struct btrfs_fs_info *fs_info = eb->fs_info;
3653 int i, num_pages, failed_page_nr;
3654 int flush = 0;
3655 int ret = 0;
3656
3657 if (!btrfs_try_tree_write_lock(eb)) {
3658 ret = flush_write_bio(epd);
3659 if (ret < 0)
3660 return ret;
3661 flush = 1;
3662 btrfs_tree_lock(eb);
3663 }
3664
3665 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3666 btrfs_tree_unlock(eb);
3667 if (!epd->sync_io)
3668 return 0;
3669 if (!flush) {
3670 ret = flush_write_bio(epd);
3671 if (ret < 0)
3672 return ret;
3673 flush = 1;
3674 }
3675 while (1) {
3676 wait_on_extent_buffer_writeback(eb);
3677 btrfs_tree_lock(eb);
3678 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3679 break;
3680 btrfs_tree_unlock(eb);
3681 }
3682 }
3683
3684
3685
3686
3687
3688
3689 spin_lock(&eb->refs_lock);
3690 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3691 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3692 spin_unlock(&eb->refs_lock);
3693 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3694 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3695 -eb->len,
3696 fs_info->dirty_metadata_batch);
3697 ret = 1;
3698 } else {
3699 spin_unlock(&eb->refs_lock);
3700 }
3701
3702 btrfs_tree_unlock(eb);
3703
3704 if (!ret)
3705 return ret;
3706
3707 num_pages = num_extent_pages(eb);
3708 for (i = 0; i < num_pages; i++) {
3709 struct page *p = eb->pages[i];
3710
3711 if (!trylock_page(p)) {
3712 if (!flush) {
3713 int err;
3714
3715 err = flush_write_bio(epd);
3716 if (err < 0) {
3717 ret = err;
3718 failed_page_nr = i;
3719 goto err_unlock;
3720 }
3721 flush = 1;
3722 }
3723 lock_page(p);
3724 }
3725 }
3726
3727 return ret;
3728 err_unlock:
3729
3730 for (i = 0; i < failed_page_nr; i++)
3731 unlock_page(eb->pages[i]);
3732
3733
3734
3735
3736
3737 btrfs_tree_lock(eb);
3738 spin_lock(&eb->refs_lock);
3739 set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
3740 end_extent_buffer_writeback(eb);
3741 spin_unlock(&eb->refs_lock);
3742 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
3743 fs_info->dirty_metadata_batch);
3744 btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3745 btrfs_tree_unlock(eb);
3746 return ret;
3747 }
3748
3749 static void set_btree_ioerr(struct page *page)
3750 {
3751 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3752 struct btrfs_fs_info *fs_info;
3753
3754 SetPageError(page);
3755 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3756 return;
3757
3758
3759
3760
3761
3762 fs_info = eb->fs_info;
3763 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3764 eb->len, fs_info->dirty_metadata_batch);
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804 switch (eb->log_index) {
3805 case -1:
3806 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3807 break;
3808 case 0:
3809 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3810 break;
3811 case 1:
3812 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3813 break;
3814 default:
3815 BUG();
3816 }
3817 }
3818
3819 static void end_bio_extent_buffer_writepage(struct bio *bio)
3820 {
3821 struct bio_vec *bvec;
3822 struct extent_buffer *eb;
3823 int done;
3824 struct bvec_iter_all iter_all;
3825
3826 ASSERT(!bio_flagged(bio, BIO_CLONED));
3827 bio_for_each_segment_all(bvec, bio, iter_all) {
3828 struct page *page = bvec->bv_page;
3829
3830 eb = (struct extent_buffer *)page->private;
3831 BUG_ON(!eb);
3832 done = atomic_dec_and_test(&eb->io_pages);
3833
3834 if (bio->bi_status ||
3835 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3836 ClearPageUptodate(page);
3837 set_btree_ioerr(page);
3838 }
3839
3840 end_page_writeback(page);
3841
3842 if (!done)
3843 continue;
3844
3845 end_extent_buffer_writeback(eb);
3846 }
3847
3848 bio_put(bio);
3849 }
3850
3851 static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3852 struct writeback_control *wbc,
3853 struct extent_page_data *epd)
3854 {
3855 struct btrfs_fs_info *fs_info = eb->fs_info;
3856 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3857 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3858 u64 offset = eb->start;
3859 u32 nritems;
3860 int i, num_pages;
3861 unsigned long start, end;
3862 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3863 int ret = 0;
3864
3865 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3866 num_pages = num_extent_pages(eb);
3867 atomic_set(&eb->io_pages, num_pages);
3868
3869
3870 nritems = btrfs_header_nritems(eb);
3871 if (btrfs_header_level(eb) > 0) {
3872 end = btrfs_node_key_ptr_offset(nritems);
3873
3874 memzero_extent_buffer(eb, end, eb->len - end);
3875 } else {
3876
3877
3878
3879
3880 start = btrfs_item_nr_offset(nritems);
3881 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
3882 memzero_extent_buffer(eb, start, end - start);
3883 }
3884
3885 for (i = 0; i < num_pages; i++) {
3886 struct page *p = eb->pages[i];
3887
3888 clear_page_dirty_for_io(p);
3889 set_page_writeback(p);
3890 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3891 p, offset, PAGE_SIZE, 0, bdev,
3892 &epd->bio,
3893 end_bio_extent_buffer_writepage,
3894 0, 0, 0, false);
3895 if (ret) {
3896 set_btree_ioerr(p);
3897 if (PageWriteback(p))
3898 end_page_writeback(p);
3899 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3900 end_extent_buffer_writeback(eb);
3901 ret = -EIO;
3902 break;
3903 }
3904 offset += PAGE_SIZE;
3905 update_nr_written(wbc, 1);
3906 unlock_page(p);
3907 }
3908
3909 if (unlikely(ret)) {
3910 for (; i < num_pages; i++) {
3911 struct page *p = eb->pages[i];
3912 clear_page_dirty_for_io(p);
3913 unlock_page(p);
3914 }
3915 }
3916
3917 return ret;
3918 }
3919
3920 int btree_write_cache_pages(struct address_space *mapping,
3921 struct writeback_control *wbc)
3922 {
3923 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3924 struct extent_buffer *eb, *prev_eb = NULL;
3925 struct extent_page_data epd = {
3926 .bio = NULL,
3927 .tree = tree,
3928 .extent_locked = 0,
3929 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3930 };
3931 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3932 int ret = 0;
3933 int done = 0;
3934 int nr_to_write_done = 0;
3935 struct pagevec pvec;
3936 int nr_pages;
3937 pgoff_t index;
3938 pgoff_t end;
3939 int scanned = 0;
3940 xa_mark_t tag;
3941
3942 pagevec_init(&pvec);
3943 if (wbc->range_cyclic) {
3944 index = mapping->writeback_index;
3945 end = -1;
3946
3947
3948
3949
3950 scanned = (index == 0);
3951 } else {
3952 index = wbc->range_start >> PAGE_SHIFT;
3953 end = wbc->range_end >> PAGE_SHIFT;
3954 scanned = 1;
3955 }
3956 if (wbc->sync_mode == WB_SYNC_ALL)
3957 tag = PAGECACHE_TAG_TOWRITE;
3958 else
3959 tag = PAGECACHE_TAG_DIRTY;
3960 retry:
3961 if (wbc->sync_mode == WB_SYNC_ALL)
3962 tag_pages_for_writeback(mapping, index, end);
3963 while (!done && !nr_to_write_done && (index <= end) &&
3964 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
3965 tag))) {
3966 unsigned i;
3967
3968 for (i = 0; i < nr_pages; i++) {
3969 struct page *page = pvec.pages[i];
3970
3971 if (!PagePrivate(page))
3972 continue;
3973
3974 spin_lock(&mapping->private_lock);
3975 if (!PagePrivate(page)) {
3976 spin_unlock(&mapping->private_lock);
3977 continue;
3978 }
3979
3980 eb = (struct extent_buffer *)page->private;
3981
3982
3983
3984
3985
3986
3987 if (WARN_ON(!eb)) {
3988 spin_unlock(&mapping->private_lock);
3989 continue;
3990 }
3991
3992 if (eb == prev_eb) {
3993 spin_unlock(&mapping->private_lock);
3994 continue;
3995 }
3996
3997 ret = atomic_inc_not_zero(&eb->refs);
3998 spin_unlock(&mapping->private_lock);
3999 if (!ret)
4000 continue;
4001
4002 prev_eb = eb;
4003 ret = lock_extent_buffer_for_io(eb, &epd);
4004 if (!ret) {
4005 free_extent_buffer(eb);
4006 continue;
4007 } else if (ret < 0) {
4008 done = 1;
4009 free_extent_buffer(eb);
4010 break;
4011 }
4012
4013 ret = write_one_eb(eb, wbc, &epd);
4014 if (ret) {
4015 done = 1;
4016 free_extent_buffer(eb);
4017 break;
4018 }
4019 free_extent_buffer(eb);
4020
4021
4022
4023
4024
4025
4026 nr_to_write_done = wbc->nr_to_write <= 0;
4027 }
4028 pagevec_release(&pvec);
4029 cond_resched();
4030 }
4031 if (!scanned && !done) {
4032
4033
4034
4035
4036 scanned = 1;
4037 index = 0;
4038 goto retry;
4039 }
4040 ASSERT(ret <= 0);
4041 if (ret < 0) {
4042 end_write_bio(&epd, ret);
4043 return ret;
4044 }
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
4073 ret = flush_write_bio(&epd);
4074 } else {
4075 ret = -EUCLEAN;
4076 end_write_bio(&epd, ret);
4077 }
4078 return ret;
4079 }
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095 static int extent_write_cache_pages(struct address_space *mapping,
4096 struct writeback_control *wbc,
4097 struct extent_page_data *epd)
4098 {
4099 struct inode *inode = mapping->host;
4100 int ret = 0;
4101 int done = 0;
4102 int nr_to_write_done = 0;
4103 struct pagevec pvec;
4104 int nr_pages;
4105 pgoff_t index;
4106 pgoff_t end;
4107 pgoff_t done_index;
4108 int range_whole = 0;
4109 int scanned = 0;
4110 xa_mark_t tag;
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121 if (!igrab(inode))
4122 return 0;
4123
4124 pagevec_init(&pvec);
4125 if (wbc->range_cyclic) {
4126 index = mapping->writeback_index;
4127 end = -1;
4128
4129
4130
4131
4132 scanned = (index == 0);
4133 } else {
4134 index = wbc->range_start >> PAGE_SHIFT;
4135 end = wbc->range_end >> PAGE_SHIFT;
4136 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4137 range_whole = 1;
4138 scanned = 1;
4139 }
4140
4141
4142
4143
4144
4145
4146
4147
4148 if (range_whole && wbc->nr_to_write == LONG_MAX &&
4149 test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
4150 &BTRFS_I(inode)->runtime_flags))
4151 wbc->tagged_writepages = 1;
4152
4153 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4154 tag = PAGECACHE_TAG_TOWRITE;
4155 else
4156 tag = PAGECACHE_TAG_DIRTY;
4157 retry:
4158 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4159 tag_pages_for_writeback(mapping, index, end);
4160 done_index = index;
4161 while (!done && !nr_to_write_done && (index <= end) &&
4162 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
4163 &index, end, tag))) {
4164 unsigned i;
4165
4166 for (i = 0; i < nr_pages; i++) {
4167 struct page *page = pvec.pages[i];
4168
4169 done_index = page->index + 1;
4170
4171
4172
4173
4174
4175
4176
4177 if (!trylock_page(page)) {
4178 ret = flush_write_bio(epd);
4179 BUG_ON(ret < 0);
4180 lock_page(page);
4181 }
4182
4183 if (unlikely(page->mapping != mapping)) {
4184 unlock_page(page);
4185 continue;
4186 }
4187
4188 if (wbc->sync_mode != WB_SYNC_NONE) {
4189 if (PageWriteback(page)) {
4190 ret = flush_write_bio(epd);
4191 BUG_ON(ret < 0);
4192 }
4193 wait_on_page_writeback(page);
4194 }
4195
4196 if (PageWriteback(page) ||
4197 !clear_page_dirty_for_io(page)) {
4198 unlock_page(page);
4199 continue;
4200 }
4201
4202 ret = __extent_writepage(page, wbc, epd);
4203 if (ret < 0) {
4204 done = 1;
4205 break;
4206 }
4207
4208
4209
4210
4211
4212
4213 nr_to_write_done = wbc->nr_to_write <= 0;
4214 }
4215 pagevec_release(&pvec);
4216 cond_resched();
4217 }
4218 if (!scanned && !done) {
4219
4220
4221
4222
4223 scanned = 1;
4224 index = 0;
4225
4226
4227
4228
4229
4230
4231
4232 ret = flush_write_bio(epd);
4233 if (!ret)
4234 goto retry;
4235 }
4236
4237 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4238 mapping->writeback_index = done_index;
4239
4240 btrfs_add_delayed_iput(inode);
4241 return ret;
4242 }
4243
4244 int extent_write_full_page(struct page *page, struct writeback_control *wbc)
4245 {
4246 int ret;
4247 struct extent_page_data epd = {
4248 .bio = NULL,
4249 .tree = &BTRFS_I(page->mapping->host)->io_tree,
4250 .extent_locked = 0,
4251 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4252 };
4253
4254 ret = __extent_writepage(page, wbc, &epd);
4255 ASSERT(ret <= 0);
4256 if (ret < 0) {
4257 end_write_bio(&epd, ret);
4258 return ret;
4259 }
4260
4261 ret = flush_write_bio(&epd);
4262 ASSERT(ret <= 0);
4263 return ret;
4264 }
4265
4266 int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
4267 int mode)
4268 {
4269 int ret = 0;
4270 struct address_space *mapping = inode->i_mapping;
4271 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
4272 struct page *page;
4273 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4274 PAGE_SHIFT;
4275
4276 struct extent_page_data epd = {
4277 .bio = NULL,
4278 .tree = tree,
4279 .extent_locked = 1,
4280 .sync_io = mode == WB_SYNC_ALL,
4281 };
4282 struct writeback_control wbc_writepages = {
4283 .sync_mode = mode,
4284 .nr_to_write = nr_pages * 2,
4285 .range_start = start,
4286 .range_end = end + 1,
4287 };
4288
4289 while (start <= end) {
4290 page = find_get_page(mapping, start >> PAGE_SHIFT);
4291 if (clear_page_dirty_for_io(page))
4292 ret = __extent_writepage(page, &wbc_writepages, &epd);
4293 else {
4294 btrfs_writepage_endio_finish_ordered(page, start,
4295 start + PAGE_SIZE - 1, 1);
4296 unlock_page(page);
4297 }
4298 put_page(page);
4299 start += PAGE_SIZE;
4300 }
4301
4302 ASSERT(ret <= 0);
4303 if (ret < 0) {
4304 end_write_bio(&epd, ret);
4305 return ret;
4306 }
4307 ret = flush_write_bio(&epd);
4308 return ret;
4309 }
4310
4311 int extent_writepages(struct address_space *mapping,
4312 struct writeback_control *wbc)
4313 {
4314 int ret = 0;
4315 struct extent_page_data epd = {
4316 .bio = NULL,
4317 .tree = &BTRFS_I(mapping->host)->io_tree,
4318 .extent_locked = 0,
4319 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4320 };
4321
4322 ret = extent_write_cache_pages(mapping, wbc, &epd);
4323 ASSERT(ret <= 0);
4324 if (ret < 0) {
4325 end_write_bio(&epd, ret);
4326 return ret;
4327 }
4328 ret = flush_write_bio(&epd);
4329 return ret;
4330 }
4331
4332 int extent_readpages(struct address_space *mapping, struct list_head *pages,
4333 unsigned nr_pages)
4334 {
4335 struct bio *bio = NULL;
4336 unsigned long bio_flags = 0;
4337 struct page *pagepool[16];
4338 struct extent_map *em_cached = NULL;
4339 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
4340 int nr = 0;
4341 u64 prev_em_start = (u64)-1;
4342
4343 while (!list_empty(pages)) {
4344 u64 contig_end = 0;
4345
4346 for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
4347 struct page *page = lru_to_page(pages);
4348
4349 prefetchw(&page->flags);
4350 list_del(&page->lru);
4351 if (add_to_page_cache_lru(page, mapping, page->index,
4352 readahead_gfp_mask(mapping))) {
4353 put_page(page);
4354 break;
4355 }
4356
4357 pagepool[nr++] = page;
4358 contig_end = page_offset(page) + PAGE_SIZE - 1;
4359 }
4360
4361 if (nr) {
4362 u64 contig_start = page_offset(pagepool[0]);
4363
4364 ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
4365
4366 contiguous_readpages(tree, pagepool, nr, contig_start,
4367 contig_end, &em_cached, &bio, &bio_flags,
4368 &prev_em_start);
4369 }
4370 }
4371
4372 if (em_cached)
4373 free_extent_map(em_cached);
4374
4375 if (bio)
4376 return submit_one_bio(bio, 0, bio_flags);
4377 return 0;
4378 }
4379
4380
4381
4382
4383
4384
4385 int extent_invalidatepage(struct extent_io_tree *tree,
4386 struct page *page, unsigned long offset)
4387 {
4388 struct extent_state *cached_state = NULL;
4389 u64 start = page_offset(page);
4390 u64 end = start + PAGE_SIZE - 1;
4391 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4392
4393 start += ALIGN(offset, blocksize);
4394 if (start > end)
4395 return 0;
4396
4397 lock_extent_bits(tree, start, end, &cached_state);
4398 wait_on_page_writeback(page);
4399 clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC |
4400 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state);
4401 return 0;
4402 }
4403
4404
4405
4406
4407
4408
4409 static int try_release_extent_state(struct extent_io_tree *tree,
4410 struct page *page, gfp_t mask)
4411 {
4412 u64 start = page_offset(page);
4413 u64 end = start + PAGE_SIZE - 1;
4414 int ret = 1;
4415
4416 if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
4417 ret = 0;
4418 } else {
4419
4420
4421
4422
4423 ret = __clear_extent_bit(tree, start, end,
4424 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4425 0, 0, NULL, mask, NULL);
4426
4427
4428
4429
4430 if (ret < 0)
4431 ret = 0;
4432 else
4433 ret = 1;
4434 }
4435 return ret;
4436 }
4437
4438
4439
4440
4441
4442
4443 int try_release_extent_mapping(struct page *page, gfp_t mask)
4444 {
4445 struct extent_map *em;
4446 u64 start = page_offset(page);
4447 u64 end = start + PAGE_SIZE - 1;
4448 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
4449 struct extent_io_tree *tree = &btrfs_inode->io_tree;
4450 struct extent_map_tree *map = &btrfs_inode->extent_tree;
4451
4452 if (gfpflags_allow_blocking(mask) &&
4453 page->mapping->host->i_size > SZ_16M) {
4454 u64 len;
4455 while (start <= end) {
4456 len = end - start + 1;
4457 write_lock(&map->lock);
4458 em = lookup_extent_mapping(map, start, len);
4459 if (!em) {
4460 write_unlock(&map->lock);
4461 break;
4462 }
4463 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4464 em->start != start) {
4465 write_unlock(&map->lock);
4466 free_extent_map(em);
4467 break;
4468 }
4469 if (!test_range_bit(tree, em->start,
4470 extent_map_end(em) - 1,
4471 EXTENT_LOCKED, 0, NULL)) {
4472 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4473 &btrfs_inode->runtime_flags);
4474 remove_extent_mapping(map, em);
4475
4476 free_extent_map(em);
4477 }
4478 start = extent_map_end(em);
4479 write_unlock(&map->lock);
4480
4481
4482 free_extent_map(em);
4483 }
4484 }
4485 return try_release_extent_state(tree, page, mask);
4486 }
4487
4488
4489
4490
4491
4492 static struct extent_map *get_extent_skip_holes(struct inode *inode,
4493 u64 offset, u64 last)
4494 {
4495 u64 sectorsize = btrfs_inode_sectorsize(inode);
4496 struct extent_map *em;
4497 u64 len;
4498
4499 if (offset >= last)
4500 return NULL;
4501
4502 while (1) {
4503 len = last - offset;
4504 if (len == 0)
4505 break;
4506 len = ALIGN(len, sectorsize);
4507 em = btrfs_get_extent_fiemap(BTRFS_I(inode), offset, len);
4508 if (IS_ERR_OR_NULL(em))
4509 return em;
4510
4511
4512 if (em->block_start != EXTENT_MAP_HOLE)
4513 return em;
4514
4515
4516 offset = extent_map_end(em);
4517 free_extent_map(em);
4518 if (offset >= last)
4519 break;
4520 }
4521 return NULL;
4522 }
4523
4524
4525
4526
4527
4528
4529 struct fiemap_cache {
4530 u64 offset;
4531 u64 phys;
4532 u64 len;
4533 u32 flags;
4534 bool cached;
4535 };
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547 static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4548 struct fiemap_cache *cache,
4549 u64 offset, u64 phys, u64 len, u32 flags)
4550 {
4551 int ret = 0;
4552
4553 if (!cache->cached)
4554 goto assign;
4555
4556
4557
4558
4559
4560
4561
4562
4563 if (cache->offset + cache->len > offset) {
4564 WARN_ON(1);
4565 return -EINVAL;
4566 }
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579 if (cache->offset + cache->len == offset &&
4580 cache->phys + cache->len == phys &&
4581 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4582 (flags & ~FIEMAP_EXTENT_LAST)) {
4583 cache->len += len;
4584 cache->flags |= flags;
4585 goto try_submit_last;
4586 }
4587
4588
4589 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4590 cache->len, cache->flags);
4591 cache->cached = false;
4592 if (ret)
4593 return ret;
4594 assign:
4595 cache->cached = true;
4596 cache->offset = offset;
4597 cache->phys = phys;
4598 cache->len = len;
4599 cache->flags = flags;
4600 try_submit_last:
4601 if (cache->flags & FIEMAP_EXTENT_LAST) {
4602 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4603 cache->phys, cache->len, cache->flags);
4604 cache->cached = false;
4605 }
4606 return ret;
4607 }
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620 static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
4621 struct fiemap_cache *cache)
4622 {
4623 int ret;
4624
4625 if (!cache->cached)
4626 return 0;
4627
4628 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4629 cache->len, cache->flags);
4630 cache->cached = false;
4631 if (ret > 0)
4632 ret = 0;
4633 return ret;
4634 }
4635
4636 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4637 __u64 start, __u64 len)
4638 {
4639 int ret = 0;
4640 u64 off = start;
4641 u64 max = start + len;
4642 u32 flags = 0;
4643 u32 found_type;
4644 u64 last;
4645 u64 last_for_get_extent = 0;
4646 u64 disko = 0;
4647 u64 isize = i_size_read(inode);
4648 struct btrfs_key found_key;
4649 struct extent_map *em = NULL;
4650 struct extent_state *cached_state = NULL;
4651 struct btrfs_path *path;
4652 struct btrfs_root *root = BTRFS_I(inode)->root;
4653 struct fiemap_cache cache = { 0 };
4654 struct ulist *roots;
4655 struct ulist *tmp_ulist;
4656 int end = 0;
4657 u64 em_start = 0;
4658 u64 em_len = 0;
4659 u64 em_end = 0;
4660
4661 if (len == 0)
4662 return -EINVAL;
4663
4664 path = btrfs_alloc_path();
4665 if (!path)
4666 return -ENOMEM;
4667 path->leave_spinning = 1;
4668
4669 roots = ulist_alloc(GFP_KERNEL);
4670 tmp_ulist = ulist_alloc(GFP_KERNEL);
4671 if (!roots || !tmp_ulist) {
4672 ret = -ENOMEM;
4673 goto out_free_ulist;
4674 }
4675
4676 start = round_down(start, btrfs_inode_sectorsize(inode));
4677 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4678
4679
4680
4681
4682
4683 ret = btrfs_lookup_file_extent(NULL, root, path,
4684 btrfs_ino(BTRFS_I(inode)), -1, 0);
4685 if (ret < 0) {
4686 goto out_free_ulist;
4687 } else {
4688 WARN_ON(!ret);
4689 if (ret == 1)
4690 ret = 0;
4691 }
4692
4693 path->slots[0]--;
4694 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4695 found_type = found_key.type;
4696
4697
4698 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
4699 found_type != BTRFS_EXTENT_DATA_KEY) {
4700
4701 last = (u64)-1;
4702 last_for_get_extent = isize;
4703 } else {
4704
4705
4706
4707
4708
4709 last = found_key.offset;
4710 last_for_get_extent = last + 1;
4711 }
4712 btrfs_release_path(path);
4713
4714
4715
4716
4717
4718
4719 if (last < isize) {
4720 last = (u64)-1;
4721 last_for_get_extent = isize;
4722 }
4723
4724 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4725 &cached_state);
4726
4727 em = get_extent_skip_holes(inode, start, last_for_get_extent);
4728 if (!em)
4729 goto out;
4730 if (IS_ERR(em)) {
4731 ret = PTR_ERR(em);
4732 goto out;
4733 }
4734
4735 while (!end) {
4736 u64 offset_in_extent = 0;
4737
4738
4739 if (em->start >= max || extent_map_end(em) < off)
4740 break;
4741
4742
4743
4744
4745
4746
4747
4748 em_start = max(em->start, off);
4749
4750
4751
4752
4753
4754
4755
4756 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4757 offset_in_extent = em_start - em->start;
4758 em_end = extent_map_end(em);
4759 em_len = em_end - em_start;
4760 flags = 0;
4761 if (em->block_start < EXTENT_MAP_LAST_BYTE)
4762 disko = em->block_start + offset_in_extent;
4763 else
4764 disko = 0;
4765
4766
4767
4768
4769 off = extent_map_end(em);
4770 if (off >= max)
4771 end = 1;
4772
4773 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4774 end = 1;
4775 flags |= FIEMAP_EXTENT_LAST;
4776 } else if (em->block_start == EXTENT_MAP_INLINE) {
4777 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4778 FIEMAP_EXTENT_NOT_ALIGNED);
4779 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4780 flags |= (FIEMAP_EXTENT_DELALLOC |
4781 FIEMAP_EXTENT_UNKNOWN);
4782 } else if (fieinfo->fi_extents_max) {
4783 u64 bytenr = em->block_start -
4784 (em->start - em->orig_start);
4785
4786
4787
4788
4789
4790
4791
4792
4793 ret = btrfs_check_shared(root,
4794 btrfs_ino(BTRFS_I(inode)),
4795 bytenr, roots, tmp_ulist);
4796 if (ret < 0)
4797 goto out_free;
4798 if (ret)
4799 flags |= FIEMAP_EXTENT_SHARED;
4800 ret = 0;
4801 }
4802 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4803 flags |= FIEMAP_EXTENT_ENCODED;
4804 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4805 flags |= FIEMAP_EXTENT_UNWRITTEN;
4806
4807 free_extent_map(em);
4808 em = NULL;
4809 if ((em_start >= last) || em_len == (u64)-1 ||
4810 (last == (u64)-1 && isize <= em_end)) {
4811 flags |= FIEMAP_EXTENT_LAST;
4812 end = 1;
4813 }
4814
4815
4816 em = get_extent_skip_holes(inode, off, last_for_get_extent);
4817 if (IS_ERR(em)) {
4818 ret = PTR_ERR(em);
4819 goto out;
4820 }
4821 if (!em) {
4822 flags |= FIEMAP_EXTENT_LAST;
4823 end = 1;
4824 }
4825 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4826 em_len, flags);
4827 if (ret) {
4828 if (ret == 1)
4829 ret = 0;
4830 goto out_free;
4831 }
4832 }
4833 out_free:
4834 if (!ret)
4835 ret = emit_last_fiemap_cache(fieinfo, &cache);
4836 free_extent_map(em);
4837 out:
4838 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4839 &cached_state);
4840
4841 out_free_ulist:
4842 btrfs_free_path(path);
4843 ulist_free(roots);
4844 ulist_free(tmp_ulist);
4845 return ret;
4846 }
4847
4848 static void __free_extent_buffer(struct extent_buffer *eb)
4849 {
4850 btrfs_leak_debug_del(&eb->leak_list);
4851 kmem_cache_free(extent_buffer_cache, eb);
4852 }
4853
4854 int extent_buffer_under_io(struct extent_buffer *eb)
4855 {
4856 return (atomic_read(&eb->io_pages) ||
4857 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4858 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4859 }
4860
4861
4862
4863
4864 static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
4865 {
4866 int i;
4867 int num_pages;
4868 int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4869
4870 BUG_ON(extent_buffer_under_io(eb));
4871
4872 num_pages = num_extent_pages(eb);
4873 for (i = 0; i < num_pages; i++) {
4874 struct page *page = eb->pages[i];
4875
4876 if (!page)
4877 continue;
4878 if (mapped)
4879 spin_lock(&page->mapping->private_lock);
4880
4881
4882
4883
4884
4885
4886
4887 if (PagePrivate(page) &&
4888 page->private == (unsigned long)eb) {
4889 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4890 BUG_ON(PageDirty(page));
4891 BUG_ON(PageWriteback(page));
4892
4893
4894
4895
4896 ClearPagePrivate(page);
4897 set_page_private(page, 0);
4898
4899 put_page(page);
4900 }
4901
4902 if (mapped)
4903 spin_unlock(&page->mapping->private_lock);
4904
4905
4906 put_page(page);
4907 }
4908 }
4909
4910
4911
4912
4913 static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4914 {
4915 btrfs_release_extent_buffer_pages(eb);
4916 __free_extent_buffer(eb);
4917 }
4918
4919 static struct extent_buffer *
4920 __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4921 unsigned long len)
4922 {
4923 struct extent_buffer *eb = NULL;
4924
4925 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4926 eb->start = start;
4927 eb->len = len;
4928 eb->fs_info = fs_info;
4929 eb->bflags = 0;
4930 rwlock_init(&eb->lock);
4931 atomic_set(&eb->blocking_readers, 0);
4932 eb->blocking_writers = 0;
4933 eb->lock_nested = false;
4934 init_waitqueue_head(&eb->write_lock_wq);
4935 init_waitqueue_head(&eb->read_lock_wq);
4936
4937 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4938
4939 spin_lock_init(&eb->refs_lock);
4940 atomic_set(&eb->refs, 1);
4941 atomic_set(&eb->io_pages, 0);
4942
4943
4944
4945
4946 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4947 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4948 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4949
4950 #ifdef CONFIG_BTRFS_DEBUG
4951 eb->spinning_writers = 0;
4952 atomic_set(&eb->spinning_readers, 0);
4953 atomic_set(&eb->read_locks, 0);
4954 eb->write_locks = 0;
4955 #endif
4956
4957 return eb;
4958 }
4959
4960 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4961 {
4962 int i;
4963 struct page *p;
4964 struct extent_buffer *new;
4965 int num_pages = num_extent_pages(src);
4966
4967 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4968 if (new == NULL)
4969 return NULL;
4970
4971 for (i = 0; i < num_pages; i++) {
4972 p = alloc_page(GFP_NOFS);
4973 if (!p) {
4974 btrfs_release_extent_buffer(new);
4975 return NULL;
4976 }
4977 attach_extent_buffer_page(new, p);
4978 WARN_ON(PageDirty(p));
4979 SetPageUptodate(p);
4980 new->pages[i] = p;
4981 copy_page(page_address(p), page_address(src->pages[i]));
4982 }
4983
4984 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4985 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
4986
4987 return new;
4988 }
4989
4990 struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4991 u64 start, unsigned long len)
4992 {
4993 struct extent_buffer *eb;
4994 int num_pages;
4995 int i;
4996
4997 eb = __alloc_extent_buffer(fs_info, start, len);
4998 if (!eb)
4999 return NULL;
5000
5001 num_pages = num_extent_pages(eb);
5002 for (i = 0; i < num_pages; i++) {
5003 eb->pages[i] = alloc_page(GFP_NOFS);
5004 if (!eb->pages[i])
5005 goto err;
5006 }
5007 set_extent_buffer_uptodate(eb);
5008 btrfs_set_header_nritems(eb, 0);
5009 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
5010
5011 return eb;
5012 err:
5013 for (; i > 0; i--)
5014 __free_page(eb->pages[i - 1]);
5015 __free_extent_buffer(eb);
5016 return NULL;
5017 }
5018
5019 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
5020 u64 start)
5021 {
5022 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
5023 }
5024
5025 static void check_buffer_tree_ref(struct extent_buffer *eb)
5026 {
5027 int refs;
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048 refs = atomic_read(&eb->refs);
5049 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5050 return;
5051
5052 spin_lock(&eb->refs_lock);
5053 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5054 atomic_inc(&eb->refs);
5055 spin_unlock(&eb->refs_lock);
5056 }
5057
5058 static void mark_extent_buffer_accessed(struct extent_buffer *eb,
5059 struct page *accessed)
5060 {
5061 int num_pages, i;
5062
5063 check_buffer_tree_ref(eb);
5064
5065 num_pages = num_extent_pages(eb);
5066 for (i = 0; i < num_pages; i++) {
5067 struct page *p = eb->pages[i];
5068
5069 if (p != accessed)
5070 mark_page_accessed(p);
5071 }
5072 }
5073
5074 struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
5075 u64 start)
5076 {
5077 struct extent_buffer *eb;
5078
5079 rcu_read_lock();
5080 eb = radix_tree_lookup(&fs_info->buffer_radix,
5081 start >> PAGE_SHIFT);
5082 if (eb && atomic_inc_not_zero(&eb->refs)) {
5083 rcu_read_unlock();
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
5100 spin_lock(&eb->refs_lock);
5101 spin_unlock(&eb->refs_lock);
5102 }
5103 mark_extent_buffer_accessed(eb, NULL);
5104 return eb;
5105 }
5106 rcu_read_unlock();
5107
5108 return NULL;
5109 }
5110
5111 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5112 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
5113 u64 start)
5114 {
5115 struct extent_buffer *eb, *exists = NULL;
5116 int ret;
5117
5118 eb = find_extent_buffer(fs_info, start);
5119 if (eb)
5120 return eb;
5121 eb = alloc_dummy_extent_buffer(fs_info, start);
5122 if (!eb)
5123 return ERR_PTR(-ENOMEM);
5124 eb->fs_info = fs_info;
5125 again:
5126 ret = radix_tree_preload(GFP_NOFS);
5127 if (ret) {
5128 exists = ERR_PTR(ret);
5129 goto free_eb;
5130 }
5131 spin_lock(&fs_info->buffer_lock);
5132 ret = radix_tree_insert(&fs_info->buffer_radix,
5133 start >> PAGE_SHIFT, eb);
5134 spin_unlock(&fs_info->buffer_lock);
5135 radix_tree_preload_end();
5136 if (ret == -EEXIST) {
5137 exists = find_extent_buffer(fs_info, start);
5138 if (exists)
5139 goto free_eb;
5140 else
5141 goto again;
5142 }
5143 check_buffer_tree_ref(eb);
5144 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5145
5146 return eb;
5147 free_eb:
5148 btrfs_release_extent_buffer(eb);
5149 return exists;
5150 }
5151 #endif
5152
5153 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
5154 u64 start)
5155 {
5156 unsigned long len = fs_info->nodesize;
5157 int num_pages;
5158 int i;
5159 unsigned long index = start >> PAGE_SHIFT;
5160 struct extent_buffer *eb;
5161 struct extent_buffer *exists = NULL;
5162 struct page *p;
5163 struct address_space *mapping = fs_info->btree_inode->i_mapping;
5164 int uptodate = 1;
5165 int ret;
5166
5167 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
5168 btrfs_err(fs_info, "bad tree block start %llu", start);
5169 return ERR_PTR(-EINVAL);
5170 }
5171
5172 eb = find_extent_buffer(fs_info, start);
5173 if (eb)
5174 return eb;
5175
5176 eb = __alloc_extent_buffer(fs_info, start, len);
5177 if (!eb)
5178 return ERR_PTR(-ENOMEM);
5179
5180 num_pages = num_extent_pages(eb);
5181 for (i = 0; i < num_pages; i++, index++) {
5182 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
5183 if (!p) {
5184 exists = ERR_PTR(-ENOMEM);
5185 goto free_eb;
5186 }
5187
5188 spin_lock(&mapping->private_lock);
5189 if (PagePrivate(p)) {
5190
5191
5192
5193
5194
5195
5196
5197 exists = (struct extent_buffer *)p->private;
5198 if (atomic_inc_not_zero(&exists->refs)) {
5199 spin_unlock(&mapping->private_lock);
5200 unlock_page(p);
5201 put_page(p);
5202 mark_extent_buffer_accessed(exists, p);
5203 goto free_eb;
5204 }
5205 exists = NULL;
5206
5207
5208
5209
5210
5211 ClearPagePrivate(p);
5212 WARN_ON(PageDirty(p));
5213 put_page(p);
5214 }
5215 attach_extent_buffer_page(eb, p);
5216 spin_unlock(&mapping->private_lock);
5217 WARN_ON(PageDirty(p));
5218 eb->pages[i] = p;
5219 if (!PageUptodate(p))
5220 uptodate = 0;
5221
5222
5223
5224
5225
5226
5227
5228
5229 }
5230 if (uptodate)
5231 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5232 again:
5233 ret = radix_tree_preload(GFP_NOFS);
5234 if (ret) {
5235 exists = ERR_PTR(ret);
5236 goto free_eb;
5237 }
5238
5239 spin_lock(&fs_info->buffer_lock);
5240 ret = radix_tree_insert(&fs_info->buffer_radix,
5241 start >> PAGE_SHIFT, eb);
5242 spin_unlock(&fs_info->buffer_lock);
5243 radix_tree_preload_end();
5244 if (ret == -EEXIST) {
5245 exists = find_extent_buffer(fs_info, start);
5246 if (exists)
5247 goto free_eb;
5248 else
5249 goto again;
5250 }
5251
5252 check_buffer_tree_ref(eb);
5253 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5254
5255
5256
5257
5258
5259
5260 for (i = 0; i < num_pages; i++)
5261 unlock_page(eb->pages[i]);
5262 return eb;
5263
5264 free_eb:
5265 WARN_ON(!atomic_dec_and_test(&eb->refs));
5266 for (i = 0; i < num_pages; i++) {
5267 if (eb->pages[i])
5268 unlock_page(eb->pages[i]);
5269 }
5270
5271 btrfs_release_extent_buffer(eb);
5272 return exists;
5273 }
5274
5275 static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5276 {
5277 struct extent_buffer *eb =
5278 container_of(head, struct extent_buffer, rcu_head);
5279
5280 __free_extent_buffer(eb);
5281 }
5282
5283 static int release_extent_buffer(struct extent_buffer *eb)
5284 {
5285 lockdep_assert_held(&eb->refs_lock);
5286
5287 WARN_ON(atomic_read(&eb->refs) == 0);
5288 if (atomic_dec_and_test(&eb->refs)) {
5289 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5290 struct btrfs_fs_info *fs_info = eb->fs_info;
5291
5292 spin_unlock(&eb->refs_lock);
5293
5294 spin_lock(&fs_info->buffer_lock);
5295 radix_tree_delete(&fs_info->buffer_radix,
5296 eb->start >> PAGE_SHIFT);
5297 spin_unlock(&fs_info->buffer_lock);
5298 } else {
5299 spin_unlock(&eb->refs_lock);
5300 }
5301
5302
5303 btrfs_release_extent_buffer_pages(eb);
5304 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5305 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
5306 __free_extent_buffer(eb);
5307 return 1;
5308 }
5309 #endif
5310 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5311 return 1;
5312 }
5313 spin_unlock(&eb->refs_lock);
5314
5315 return 0;
5316 }
5317
5318 void free_extent_buffer(struct extent_buffer *eb)
5319 {
5320 int refs;
5321 int old;
5322 if (!eb)
5323 return;
5324
5325 while (1) {
5326 refs = atomic_read(&eb->refs);
5327 if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
5328 || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
5329 refs == 1))
5330 break;
5331 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5332 if (old == refs)
5333 return;
5334 }
5335
5336 spin_lock(&eb->refs_lock);
5337 if (atomic_read(&eb->refs) == 2 &&
5338 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5339 !extent_buffer_under_io(eb) &&
5340 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5341 atomic_dec(&eb->refs);
5342
5343
5344
5345
5346
5347 release_extent_buffer(eb);
5348 }
5349
5350 void free_extent_buffer_stale(struct extent_buffer *eb)
5351 {
5352 if (!eb)
5353 return;
5354
5355 spin_lock(&eb->refs_lock);
5356 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5357
5358 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5359 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5360 atomic_dec(&eb->refs);
5361 release_extent_buffer(eb);
5362 }
5363
5364 void clear_extent_buffer_dirty(struct extent_buffer *eb)
5365 {
5366 int i;
5367 int num_pages;
5368 struct page *page;
5369
5370 num_pages = num_extent_pages(eb);
5371
5372 for (i = 0; i < num_pages; i++) {
5373 page = eb->pages[i];
5374 if (!PageDirty(page))
5375 continue;
5376
5377 lock_page(page);
5378 WARN_ON(!PagePrivate(page));
5379
5380 clear_page_dirty_for_io(page);
5381 xa_lock_irq(&page->mapping->i_pages);
5382 if (!PageDirty(page))
5383 __xa_clear_mark(&page->mapping->i_pages,
5384 page_index(page), PAGECACHE_TAG_DIRTY);
5385 xa_unlock_irq(&page->mapping->i_pages);
5386 ClearPageError(page);
5387 unlock_page(page);
5388 }
5389 WARN_ON(atomic_read(&eb->refs) == 0);
5390 }
5391
5392 bool set_extent_buffer_dirty(struct extent_buffer *eb)
5393 {
5394 int i;
5395 int num_pages;
5396 bool was_dirty;
5397
5398 check_buffer_tree_ref(eb);
5399
5400 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5401
5402 num_pages = num_extent_pages(eb);
5403 WARN_ON(atomic_read(&eb->refs) == 0);
5404 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5405
5406 if (!was_dirty)
5407 for (i = 0; i < num_pages; i++)
5408 set_page_dirty(eb->pages[i]);
5409
5410 #ifdef CONFIG_BTRFS_DEBUG
5411 for (i = 0; i < num_pages; i++)
5412 ASSERT(PageDirty(eb->pages[i]));
5413 #endif
5414
5415 return was_dirty;
5416 }
5417
5418 void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5419 {
5420 int i;
5421 struct page *page;
5422 int num_pages;
5423
5424 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5425 num_pages = num_extent_pages(eb);
5426 for (i = 0; i < num_pages; i++) {
5427 page = eb->pages[i];
5428 if (page)
5429 ClearPageUptodate(page);
5430 }
5431 }
5432
5433 void set_extent_buffer_uptodate(struct extent_buffer *eb)
5434 {
5435 int i;
5436 struct page *page;
5437 int num_pages;
5438
5439 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5440 num_pages = num_extent_pages(eb);
5441 for (i = 0; i < num_pages; i++) {
5442 page = eb->pages[i];
5443 SetPageUptodate(page);
5444 }
5445 }
5446
5447 int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
5448 {
5449 int i;
5450 struct page *page;
5451 int err;
5452 int ret = 0;
5453 int locked_pages = 0;
5454 int all_uptodate = 1;
5455 int num_pages;
5456 unsigned long num_reads = 0;
5457 struct bio *bio = NULL;
5458 unsigned long bio_flags = 0;
5459 struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
5460
5461 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5462 return 0;
5463
5464 num_pages = num_extent_pages(eb);
5465 for (i = 0; i < num_pages; i++) {
5466 page = eb->pages[i];
5467 if (wait == WAIT_NONE) {
5468 if (!trylock_page(page))
5469 goto unlock_exit;
5470 } else {
5471 lock_page(page);
5472 }
5473 locked_pages++;
5474 }
5475
5476
5477
5478
5479
5480 for (i = 0; i < num_pages; i++) {
5481 page = eb->pages[i];
5482 if (!PageUptodate(page)) {
5483 num_reads++;
5484 all_uptodate = 0;
5485 }
5486 }
5487
5488 if (all_uptodate) {
5489 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5490 goto unlock_exit;
5491 }
5492
5493 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5494 eb->read_mirror = 0;
5495 atomic_set(&eb->io_pages, num_reads);
5496 for (i = 0; i < num_pages; i++) {
5497 page = eb->pages[i];
5498
5499 if (!PageUptodate(page)) {
5500 if (ret) {
5501 atomic_dec(&eb->io_pages);
5502 unlock_page(page);
5503 continue;
5504 }
5505
5506 ClearPageError(page);
5507 err = __extent_read_full_page(tree, page,
5508 btree_get_extent, &bio,
5509 mirror_num, &bio_flags,
5510 REQ_META);
5511 if (err) {
5512 ret = err;
5513
5514
5515
5516
5517
5518
5519
5520
5521 atomic_dec(&eb->io_pages);
5522 }
5523 } else {
5524 unlock_page(page);
5525 }
5526 }
5527
5528 if (bio) {
5529 err = submit_one_bio(bio, mirror_num, bio_flags);
5530 if (err)
5531 return err;
5532 }
5533
5534 if (ret || wait != WAIT_COMPLETE)
5535 return ret;
5536
5537 for (i = 0; i < num_pages; i++) {
5538 page = eb->pages[i];
5539 wait_on_page_locked(page);
5540 if (!PageUptodate(page))
5541 ret = -EIO;
5542 }
5543
5544 return ret;
5545
5546 unlock_exit:
5547 while (locked_pages > 0) {
5548 locked_pages--;
5549 page = eb->pages[locked_pages];
5550 unlock_page(page);
5551 }
5552 return ret;
5553 }
5554
5555 void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5556 unsigned long start, unsigned long len)
5557 {
5558 size_t cur;
5559 size_t offset;
5560 struct page *page;
5561 char *kaddr;
5562 char *dst = (char *)dstv;
5563 size_t start_offset = offset_in_page(eb->start);
5564 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5565
5566 if (start + len > eb->len) {
5567 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5568 eb->start, eb->len, start, len);
5569 memset(dst, 0, len);
5570 return;
5571 }
5572
5573 offset = offset_in_page(start_offset + start);
5574
5575 while (len > 0) {
5576 page = eb->pages[i];
5577
5578 cur = min(len, (PAGE_SIZE - offset));
5579 kaddr = page_address(page);
5580 memcpy(dst, kaddr + offset, cur);
5581
5582 dst += cur;
5583 len -= cur;
5584 offset = 0;
5585 i++;
5586 }
5587 }
5588
5589 int read_extent_buffer_to_user(const struct extent_buffer *eb,
5590 void __user *dstv,
5591 unsigned long start, unsigned long len)
5592 {
5593 size_t cur;
5594 size_t offset;
5595 struct page *page;
5596 char *kaddr;
5597 char __user *dst = (char __user *)dstv;
5598 size_t start_offset = offset_in_page(eb->start);
5599 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5600 int ret = 0;
5601
5602 WARN_ON(start > eb->len);
5603 WARN_ON(start + len > eb->start + eb->len);
5604
5605 offset = offset_in_page(start_offset + start);
5606
5607 while (len > 0) {
5608 page = eb->pages[i];
5609
5610 cur = min(len, (PAGE_SIZE - offset));
5611 kaddr = page_address(page);
5612 if (copy_to_user(dst, kaddr + offset, cur)) {
5613 ret = -EFAULT;
5614 break;
5615 }
5616
5617 dst += cur;
5618 len -= cur;
5619 offset = 0;
5620 i++;
5621 }
5622
5623 return ret;
5624 }
5625
5626
5627
5628
5629
5630
5631 int map_private_extent_buffer(const struct extent_buffer *eb,
5632 unsigned long start, unsigned long min_len,
5633 char **map, unsigned long *map_start,
5634 unsigned long *map_len)
5635 {
5636 size_t offset;
5637 char *kaddr;
5638 struct page *p;
5639 size_t start_offset = offset_in_page(eb->start);
5640 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5641 unsigned long end_i = (start_offset + start + min_len - 1) >>
5642 PAGE_SHIFT;
5643
5644 if (start + min_len > eb->len) {
5645 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5646 eb->start, eb->len, start, min_len);
5647 return -EINVAL;
5648 }
5649
5650 if (i != end_i)
5651 return 1;
5652
5653 if (i == 0) {
5654 offset = start_offset;
5655 *map_start = 0;
5656 } else {
5657 offset = 0;
5658 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5659 }
5660
5661 p = eb->pages[i];
5662 kaddr = page_address(p);
5663 *map = kaddr + offset;
5664 *map_len = PAGE_SIZE - offset;
5665 return 0;
5666 }
5667
5668 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5669 unsigned long start, unsigned long len)
5670 {
5671 size_t cur;
5672 size_t offset;
5673 struct page *page;
5674 char *kaddr;
5675 char *ptr = (char *)ptrv;
5676 size_t start_offset = offset_in_page(eb->start);
5677 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5678 int ret = 0;
5679
5680 WARN_ON(start > eb->len);
5681 WARN_ON(start + len > eb->start + eb->len);
5682
5683 offset = offset_in_page(start_offset + start);
5684
5685 while (len > 0) {
5686 page = eb->pages[i];
5687
5688 cur = min(len, (PAGE_SIZE - offset));
5689
5690 kaddr = page_address(page);
5691 ret = memcmp(ptr, kaddr + offset, cur);
5692 if (ret)
5693 break;
5694
5695 ptr += cur;
5696 len -= cur;
5697 offset = 0;
5698 i++;
5699 }
5700 return ret;
5701 }
5702
5703 void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5704 const void *srcv)
5705 {
5706 char *kaddr;
5707
5708 WARN_ON(!PageUptodate(eb->pages[0]));
5709 kaddr = page_address(eb->pages[0]);
5710 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5711 BTRFS_FSID_SIZE);
5712 }
5713
5714 void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5715 {
5716 char *kaddr;
5717
5718 WARN_ON(!PageUptodate(eb->pages[0]));
5719 kaddr = page_address(eb->pages[0]);
5720 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5721 BTRFS_FSID_SIZE);
5722 }
5723
5724 void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5725 unsigned long start, unsigned long len)
5726 {
5727 size_t cur;
5728 size_t offset;
5729 struct page *page;
5730 char *kaddr;
5731 char *src = (char *)srcv;
5732 size_t start_offset = offset_in_page(eb->start);
5733 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5734
5735 WARN_ON(start > eb->len);
5736 WARN_ON(start + len > eb->start + eb->len);
5737
5738 offset = offset_in_page(start_offset + start);
5739
5740 while (len > 0) {
5741 page = eb->pages[i];
5742 WARN_ON(!PageUptodate(page));
5743
5744 cur = min(len, PAGE_SIZE - offset);
5745 kaddr = page_address(page);
5746 memcpy(kaddr + offset, src, cur);
5747
5748 src += cur;
5749 len -= cur;
5750 offset = 0;
5751 i++;
5752 }
5753 }
5754
5755 void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5756 unsigned long len)
5757 {
5758 size_t cur;
5759 size_t offset;
5760 struct page *page;
5761 char *kaddr;
5762 size_t start_offset = offset_in_page(eb->start);
5763 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5764
5765 WARN_ON(start > eb->len);
5766 WARN_ON(start + len > eb->start + eb->len);
5767
5768 offset = offset_in_page(start_offset + start);
5769
5770 while (len > 0) {
5771 page = eb->pages[i];
5772 WARN_ON(!PageUptodate(page));
5773
5774 cur = min(len, PAGE_SIZE - offset);
5775 kaddr = page_address(page);
5776 memset(kaddr + offset, 0, cur);
5777
5778 len -= cur;
5779 offset = 0;
5780 i++;
5781 }
5782 }
5783
5784 void copy_extent_buffer_full(struct extent_buffer *dst,
5785 struct extent_buffer *src)
5786 {
5787 int i;
5788 int num_pages;
5789
5790 ASSERT(dst->len == src->len);
5791
5792 num_pages = num_extent_pages(dst);
5793 for (i = 0; i < num_pages; i++)
5794 copy_page(page_address(dst->pages[i]),
5795 page_address(src->pages[i]));
5796 }
5797
5798 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5799 unsigned long dst_offset, unsigned long src_offset,
5800 unsigned long len)
5801 {
5802 u64 dst_len = dst->len;
5803 size_t cur;
5804 size_t offset;
5805 struct page *page;
5806 char *kaddr;
5807 size_t start_offset = offset_in_page(dst->start);
5808 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5809
5810 WARN_ON(src->len != dst_len);
5811
5812 offset = offset_in_page(start_offset + dst_offset);
5813
5814 while (len > 0) {
5815 page = dst->pages[i];
5816 WARN_ON(!PageUptodate(page));
5817
5818 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5819
5820 kaddr = page_address(page);
5821 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5822
5823 src_offset += cur;
5824 len -= cur;
5825 offset = 0;
5826 i++;
5827 }
5828 }
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843 static inline void eb_bitmap_offset(struct extent_buffer *eb,
5844 unsigned long start, unsigned long nr,
5845 unsigned long *page_index,
5846 size_t *page_offset)
5847 {
5848 size_t start_offset = offset_in_page(eb->start);
5849 size_t byte_offset = BIT_BYTE(nr);
5850 size_t offset;
5851
5852
5853
5854
5855
5856
5857 offset = start_offset + start + byte_offset;
5858
5859 *page_index = offset >> PAGE_SHIFT;
5860 *page_offset = offset_in_page(offset);
5861 }
5862
5863
5864
5865
5866
5867
5868
5869 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5870 unsigned long nr)
5871 {
5872 u8 *kaddr;
5873 struct page *page;
5874 unsigned long i;
5875 size_t offset;
5876
5877 eb_bitmap_offset(eb, start, nr, &i, &offset);
5878 page = eb->pages[i];
5879 WARN_ON(!PageUptodate(page));
5880 kaddr = page_address(page);
5881 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5882 }
5883
5884
5885
5886
5887
5888
5889
5890
5891 void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5892 unsigned long pos, unsigned long len)
5893 {
5894 u8 *kaddr;
5895 struct page *page;
5896 unsigned long i;
5897 size_t offset;
5898 const unsigned int size = pos + len;
5899 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5900 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5901
5902 eb_bitmap_offset(eb, start, pos, &i, &offset);
5903 page = eb->pages[i];
5904 WARN_ON(!PageUptodate(page));
5905 kaddr = page_address(page);
5906
5907 while (len >= bits_to_set) {
5908 kaddr[offset] |= mask_to_set;
5909 len -= bits_to_set;
5910 bits_to_set = BITS_PER_BYTE;
5911 mask_to_set = ~0;
5912 if (++offset >= PAGE_SIZE && len > 0) {
5913 offset = 0;
5914 page = eb->pages[++i];
5915 WARN_ON(!PageUptodate(page));
5916 kaddr = page_address(page);
5917 }
5918 }
5919 if (len) {
5920 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5921 kaddr[offset] |= mask_to_set;
5922 }
5923 }
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933 void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5934 unsigned long pos, unsigned long len)
5935 {
5936 u8 *kaddr;
5937 struct page *page;
5938 unsigned long i;
5939 size_t offset;
5940 const unsigned int size = pos + len;
5941 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5942 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5943
5944 eb_bitmap_offset(eb, start, pos, &i, &offset);
5945 page = eb->pages[i];
5946 WARN_ON(!PageUptodate(page));
5947 kaddr = page_address(page);
5948
5949 while (len >= bits_to_clear) {
5950 kaddr[offset] &= ~mask_to_clear;
5951 len -= bits_to_clear;
5952 bits_to_clear = BITS_PER_BYTE;
5953 mask_to_clear = ~0;
5954 if (++offset >= PAGE_SIZE && len > 0) {
5955 offset = 0;
5956 page = eb->pages[++i];
5957 WARN_ON(!PageUptodate(page));
5958 kaddr = page_address(page);
5959 }
5960 }
5961 if (len) {
5962 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5963 kaddr[offset] &= ~mask_to_clear;
5964 }
5965 }
5966
5967 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5968 {
5969 unsigned long distance = (src > dst) ? src - dst : dst - src;
5970 return distance < len;
5971 }
5972
5973 static void copy_pages(struct page *dst_page, struct page *src_page,
5974 unsigned long dst_off, unsigned long src_off,
5975 unsigned long len)
5976 {
5977 char *dst_kaddr = page_address(dst_page);
5978 char *src_kaddr;
5979 int must_memmove = 0;
5980
5981 if (dst_page != src_page) {
5982 src_kaddr = page_address(src_page);
5983 } else {
5984 src_kaddr = dst_kaddr;
5985 if (areas_overlap(src_off, dst_off, len))
5986 must_memmove = 1;
5987 }
5988
5989 if (must_memmove)
5990 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5991 else
5992 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5993 }
5994
5995 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5996 unsigned long src_offset, unsigned long len)
5997 {
5998 struct btrfs_fs_info *fs_info = dst->fs_info;
5999 size_t cur;
6000 size_t dst_off_in_page;
6001 size_t src_off_in_page;
6002 size_t start_offset = offset_in_page(dst->start);
6003 unsigned long dst_i;
6004 unsigned long src_i;
6005
6006 if (src_offset + len > dst->len) {
6007 btrfs_err(fs_info,
6008 "memmove bogus src_offset %lu move len %lu dst len %lu",
6009 src_offset, len, dst->len);
6010 BUG();
6011 }
6012 if (dst_offset + len > dst->len) {
6013 btrfs_err(fs_info,
6014 "memmove bogus dst_offset %lu move len %lu dst len %lu",
6015 dst_offset, len, dst->len);
6016 BUG();
6017 }
6018
6019 while (len > 0) {
6020 dst_off_in_page = offset_in_page(start_offset + dst_offset);
6021 src_off_in_page = offset_in_page(start_offset + src_offset);
6022
6023 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
6024 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
6025
6026 cur = min(len, (unsigned long)(PAGE_SIZE -
6027 src_off_in_page));
6028 cur = min_t(unsigned long, cur,
6029 (unsigned long)(PAGE_SIZE - dst_off_in_page));
6030
6031 copy_pages(dst->pages[dst_i], dst->pages[src_i],
6032 dst_off_in_page, src_off_in_page, cur);
6033
6034 src_offset += cur;
6035 dst_offset += cur;
6036 len -= cur;
6037 }
6038 }
6039
6040 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
6041 unsigned long src_offset, unsigned long len)
6042 {
6043 struct btrfs_fs_info *fs_info = dst->fs_info;
6044 size_t cur;
6045 size_t dst_off_in_page;
6046 size_t src_off_in_page;
6047 unsigned long dst_end = dst_offset + len - 1;
6048 unsigned long src_end = src_offset + len - 1;
6049 size_t start_offset = offset_in_page(dst->start);
6050 unsigned long dst_i;
6051 unsigned long src_i;
6052
6053 if (src_offset + len > dst->len) {
6054 btrfs_err(fs_info,
6055 "memmove bogus src_offset %lu move len %lu len %lu",
6056 src_offset, len, dst->len);
6057 BUG();
6058 }
6059 if (dst_offset + len > dst->len) {
6060 btrfs_err(fs_info,
6061 "memmove bogus dst_offset %lu move len %lu len %lu",
6062 dst_offset, len, dst->len);
6063 BUG();
6064 }
6065 if (dst_offset < src_offset) {
6066 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
6067 return;
6068 }
6069 while (len > 0) {
6070 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
6071 src_i = (start_offset + src_end) >> PAGE_SHIFT;
6072
6073 dst_off_in_page = offset_in_page(start_offset + dst_end);
6074 src_off_in_page = offset_in_page(start_offset + src_end);
6075
6076 cur = min_t(unsigned long, len, src_off_in_page + 1);
6077 cur = min(cur, dst_off_in_page + 1);
6078 copy_pages(dst->pages[dst_i], dst->pages[src_i],
6079 dst_off_in_page - cur + 1,
6080 src_off_in_page - cur + 1, cur);
6081
6082 dst_end -= cur;
6083 src_end -= cur;
6084 len -= cur;
6085 }
6086 }
6087
6088 int try_release_extent_buffer(struct page *page)
6089 {
6090 struct extent_buffer *eb;
6091
6092
6093
6094
6095
6096 spin_lock(&page->mapping->private_lock);
6097 if (!PagePrivate(page)) {
6098 spin_unlock(&page->mapping->private_lock);
6099 return 1;
6100 }
6101
6102 eb = (struct extent_buffer *)page->private;
6103 BUG_ON(!eb);
6104
6105
6106
6107
6108
6109
6110 spin_lock(&eb->refs_lock);
6111 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
6112 spin_unlock(&eb->refs_lock);
6113 spin_unlock(&page->mapping->private_lock);
6114 return 0;
6115 }
6116 spin_unlock(&page->mapping->private_lock);
6117
6118
6119
6120
6121
6122 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
6123 spin_unlock(&eb->refs_lock);
6124 return 0;
6125 }
6126
6127 return release_extent_buffer(eb);
6128 }