This source file includes following definitions.
- scrub_is_page_on_raid56
- scrub_pending_bio_inc
- scrub_pending_bio_dec
- __scrub_blocked_if_needed
- scrub_pause_on
- scrub_pause_off
- scrub_blocked_if_needed
- insert_full_stripe_lock
- search_full_stripe_lock
- get_full_stripe_logical
- lock_full_stripe
- unlock_full_stripe
- scrub_free_csums
- scrub_free_ctx
- scrub_put_ctx
- scrub_setup_ctx
- scrub_print_warning_inode
- scrub_print_warning
- scrub_get_recover
- scrub_put_recover
- scrub_handle_errored_block
- scrub_nr_raid_mirrors
- scrub_stripe_index_and_offset
- scrub_setup_recheck_block
- scrub_bio_wait_endio
- scrub_submit_raid56_bio_wait
- scrub_recheck_block_on_raid56
- scrub_recheck_block
- scrub_check_fsid
- scrub_recheck_block_checksum
- scrub_repair_block_from_good_copy
- scrub_repair_page_from_good_copy
- scrub_write_block_to_dev_replace
- scrub_write_page_to_dev_replace
- scrub_add_page_to_wr_bio
- scrub_wr_submit
- scrub_wr_bio_end_io
- scrub_wr_bio_end_io_worker
- scrub_checksum
- scrub_checksum_data
- scrub_checksum_tree_block
- scrub_checksum_super
- scrub_block_get
- scrub_block_put
- scrub_page_get
- scrub_page_put
- scrub_submit
- scrub_add_page_to_rd_bio
- scrub_missing_raid56_end_io
- scrub_missing_raid56_worker
- scrub_missing_raid56_pages
- scrub_pages
- scrub_bio_end_io
- scrub_bio_end_io_worker
- __scrub_mark_bitmap
- scrub_parity_mark_sectors_error
- scrub_parity_mark_sectors_data
- scrub_block_complete
- scrub_find_csum
- scrub_extent
- scrub_pages_for_parity
- scrub_extent_for_parity
- get_raid56_logic_offset
- scrub_free_parity
- scrub_parity_bio_endio_worker
- scrub_parity_bio_endio
- scrub_parity_check_and_repair
- scrub_calc_parity_bitmap_len
- scrub_parity_get
- scrub_parity_put
- scrub_raid56_parity
- scrub_stripe
- scrub_chunk
- scrub_enumerate_chunks
- scrub_supers
- scrub_workers_get
- btrfs_scrub_dev
- btrfs_scrub_pause
- btrfs_scrub_continue
- btrfs_scrub_cancel
- btrfs_scrub_cancel_dev
- btrfs_scrub_progress
- scrub_remap_extent
1
2
3
4
5
6 #include <linux/blkdev.h>
7 #include <linux/ratelimit.h>
8 #include <linux/sched/mm.h>
9 #include <crypto/hash.h>
10 #include "ctree.h"
11 #include "volumes.h"
12 #include "disk-io.h"
13 #include "ordered-data.h"
14 #include "transaction.h"
15 #include "backref.h"
16 #include "extent_io.h"
17 #include "dev-replace.h"
18 #include "check-integrity.h"
19 #include "rcu-string.h"
20 #include "raid56.h"
21 #include "block-group.h"
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 struct scrub_block;
37 struct scrub_ctx;
38
39
40
41
42
43
44
45 #define SCRUB_PAGES_PER_RD_BIO 32
46 #define SCRUB_PAGES_PER_WR_BIO 32
47 #define SCRUB_BIOS_PER_SCTX 64
48
49
50
51
52
53
54 #define SCRUB_MAX_PAGES_PER_BLOCK 16
55
56 struct scrub_recover {
57 refcount_t refs;
58 struct btrfs_bio *bbio;
59 u64 map_length;
60 };
61
62 struct scrub_page {
63 struct scrub_block *sblock;
64 struct page *page;
65 struct btrfs_device *dev;
66 struct list_head list;
67 u64 flags;
68 u64 generation;
69 u64 logical;
70 u64 physical;
71 u64 physical_for_dev_replace;
72 atomic_t refs;
73 struct {
74 unsigned int mirror_num:8;
75 unsigned int have_csum:1;
76 unsigned int io_error:1;
77 };
78 u8 csum[BTRFS_CSUM_SIZE];
79
80 struct scrub_recover *recover;
81 };
82
83 struct scrub_bio {
84 int index;
85 struct scrub_ctx *sctx;
86 struct btrfs_device *dev;
87 struct bio *bio;
88 blk_status_t status;
89 u64 logical;
90 u64 physical;
91 #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
92 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
93 #else
94 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
95 #endif
96 int page_count;
97 int next_free;
98 struct btrfs_work work;
99 };
100
101 struct scrub_block {
102 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
103 int page_count;
104 atomic_t outstanding_pages;
105 refcount_t refs;
106 struct scrub_ctx *sctx;
107 struct scrub_parity *sparity;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113
114
115
116 unsigned int data_corrected:1;
117 };
118 struct btrfs_work work;
119 };
120
121
122 struct scrub_parity {
123 struct scrub_ctx *sctx;
124
125 struct btrfs_device *scrub_dev;
126
127 u64 logic_start;
128
129 u64 logic_end;
130
131 int nsectors;
132
133 u64 stripe_len;
134
135 refcount_t refs;
136
137 struct list_head spages;
138
139
140 struct btrfs_work work;
141
142
143 unsigned long *dbitmap;
144
145
146
147
148
149 unsigned long *ebitmap;
150
151 unsigned long bitmap[0];
152 };
153
154 struct scrub_ctx {
155 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
156 struct btrfs_fs_info *fs_info;
157 int first_free;
158 int curr;
159 atomic_t bios_in_flight;
160 atomic_t workers_pending;
161 spinlock_t list_lock;
162 wait_queue_head_t list_wait;
163 u16 csum_size;
164 struct list_head csum_list;
165 atomic_t cancel_req;
166 int readonly;
167 int pages_per_rd_bio;
168
169 int is_dev_replace;
170
171 struct scrub_bio *wr_curr_bio;
172 struct mutex wr_lock;
173 int pages_per_wr_bio;
174 struct btrfs_device *wr_tgtdev;
175 bool flush_all_writes;
176
177
178
179
180 struct btrfs_scrub_progress stat;
181 spinlock_t stat_lock;
182
183
184
185
186
187
188
189
190 refcount_t refs;
191 };
192
193 struct scrub_warning {
194 struct btrfs_path *path;
195 u64 extent_item_size;
196 const char *errstr;
197 u64 physical;
198 u64 logical;
199 struct btrfs_device *dev;
200 };
201
202 struct full_stripe_lock {
203 struct rb_node node;
204 u64 logical;
205 u64 refs;
206 struct mutex mutex;
207 };
208
209 static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
210 static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
211 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
212 static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
213 struct scrub_block *sblocks_for_recheck);
214 static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
215 struct scrub_block *sblock,
216 int retry_failed_mirror);
217 static void scrub_recheck_block_checksum(struct scrub_block *sblock);
218 static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
219 struct scrub_block *sblock_good);
220 static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
221 struct scrub_block *sblock_good,
222 int page_num, int force_write);
223 static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
224 static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
225 int page_num);
226 static int scrub_checksum_data(struct scrub_block *sblock);
227 static int scrub_checksum_tree_block(struct scrub_block *sblock);
228 static int scrub_checksum_super(struct scrub_block *sblock);
229 static void scrub_block_get(struct scrub_block *sblock);
230 static void scrub_block_put(struct scrub_block *sblock);
231 static void scrub_page_get(struct scrub_page *spage);
232 static void scrub_page_put(struct scrub_page *spage);
233 static void scrub_parity_get(struct scrub_parity *sparity);
234 static void scrub_parity_put(struct scrub_parity *sparity);
235 static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
236 struct scrub_page *spage);
237 static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
238 u64 physical, struct btrfs_device *dev, u64 flags,
239 u64 gen, int mirror_num, u8 *csum, int force,
240 u64 physical_for_dev_replace);
241 static void scrub_bio_end_io(struct bio *bio);
242 static void scrub_bio_end_io_worker(struct btrfs_work *work);
243 static void scrub_block_complete(struct scrub_block *sblock);
244 static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
245 u64 extent_logical, u64 extent_len,
246 u64 *extent_physical,
247 struct btrfs_device **extent_dev,
248 int *extent_mirror_num);
249 static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
250 struct scrub_page *spage);
251 static void scrub_wr_submit(struct scrub_ctx *sctx);
252 static void scrub_wr_bio_end_io(struct bio *bio);
253 static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
254 static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
255 static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
256 static void scrub_put_ctx(struct scrub_ctx *sctx);
257
258 static inline int scrub_is_page_on_raid56(struct scrub_page *page)
259 {
260 return page->recover &&
261 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
262 }
263
264 static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
265 {
266 refcount_inc(&sctx->refs);
267 atomic_inc(&sctx->bios_in_flight);
268 }
269
270 static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
271 {
272 atomic_dec(&sctx->bios_in_flight);
273 wake_up(&sctx->list_wait);
274 scrub_put_ctx(sctx);
275 }
276
277 static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
278 {
279 while (atomic_read(&fs_info->scrub_pause_req)) {
280 mutex_unlock(&fs_info->scrub_lock);
281 wait_event(fs_info->scrub_pause_wait,
282 atomic_read(&fs_info->scrub_pause_req) == 0);
283 mutex_lock(&fs_info->scrub_lock);
284 }
285 }
286
287 static void scrub_pause_on(struct btrfs_fs_info *fs_info)
288 {
289 atomic_inc(&fs_info->scrubs_paused);
290 wake_up(&fs_info->scrub_pause_wait);
291 }
292
293 static void scrub_pause_off(struct btrfs_fs_info *fs_info)
294 {
295 mutex_lock(&fs_info->scrub_lock);
296 __scrub_blocked_if_needed(fs_info);
297 atomic_dec(&fs_info->scrubs_paused);
298 mutex_unlock(&fs_info->scrub_lock);
299
300 wake_up(&fs_info->scrub_pause_wait);
301 }
302
303 static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
304 {
305 scrub_pause_on(fs_info);
306 scrub_pause_off(fs_info);
307 }
308
309
310
311
312
313
314
315
316
317
318
319 static struct full_stripe_lock *insert_full_stripe_lock(
320 struct btrfs_full_stripe_locks_tree *locks_root,
321 u64 fstripe_logical)
322 {
323 struct rb_node **p;
324 struct rb_node *parent = NULL;
325 struct full_stripe_lock *entry;
326 struct full_stripe_lock *ret;
327
328 lockdep_assert_held(&locks_root->lock);
329
330 p = &locks_root->root.rb_node;
331 while (*p) {
332 parent = *p;
333 entry = rb_entry(parent, struct full_stripe_lock, node);
334 if (fstripe_logical < entry->logical) {
335 p = &(*p)->rb_left;
336 } else if (fstripe_logical > entry->logical) {
337 p = &(*p)->rb_right;
338 } else {
339 entry->refs++;
340 return entry;
341 }
342 }
343
344
345
346
347 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
348 if (!ret)
349 return ERR_PTR(-ENOMEM);
350 ret->logical = fstripe_logical;
351 ret->refs = 1;
352 mutex_init(&ret->mutex);
353
354 rb_link_node(&ret->node, parent, p);
355 rb_insert_color(&ret->node, &locks_root->root);
356 return ret;
357 }
358
359
360
361
362
363
364
365 static struct full_stripe_lock *search_full_stripe_lock(
366 struct btrfs_full_stripe_locks_tree *locks_root,
367 u64 fstripe_logical)
368 {
369 struct rb_node *node;
370 struct full_stripe_lock *entry;
371
372 lockdep_assert_held(&locks_root->lock);
373
374 node = locks_root->root.rb_node;
375 while (node) {
376 entry = rb_entry(node, struct full_stripe_lock, node);
377 if (fstripe_logical < entry->logical)
378 node = node->rb_left;
379 else if (fstripe_logical > entry->logical)
380 node = node->rb_right;
381 else
382 return entry;
383 }
384 return NULL;
385 }
386
387
388
389
390
391
392 static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
393 u64 bytenr)
394 {
395 u64 ret;
396
397
398
399
400
401 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
402
403
404
405
406
407 ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
408 cache->full_stripe_len + cache->key.objectid;
409 return ret;
410 }
411
412
413
414
415
416
417
418
419
420
421
422
423 static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
424 bool *locked_ret)
425 {
426 struct btrfs_block_group_cache *bg_cache;
427 struct btrfs_full_stripe_locks_tree *locks_root;
428 struct full_stripe_lock *existing;
429 u64 fstripe_start;
430 int ret = 0;
431
432 *locked_ret = false;
433 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
434 if (!bg_cache) {
435 ASSERT(0);
436 return -ENOENT;
437 }
438
439
440 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
441 goto out;
442 locks_root = &bg_cache->full_stripe_locks_root;
443
444 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
445
446
447 mutex_lock(&locks_root->lock);
448 existing = insert_full_stripe_lock(locks_root, fstripe_start);
449 mutex_unlock(&locks_root->lock);
450 if (IS_ERR(existing)) {
451 ret = PTR_ERR(existing);
452 goto out;
453 }
454 mutex_lock(&existing->mutex);
455 *locked_ret = true;
456 out:
457 btrfs_put_block_group(bg_cache);
458 return ret;
459 }
460
461
462
463
464
465
466
467
468
469
470 static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
471 bool locked)
472 {
473 struct btrfs_block_group_cache *bg_cache;
474 struct btrfs_full_stripe_locks_tree *locks_root;
475 struct full_stripe_lock *fstripe_lock;
476 u64 fstripe_start;
477 bool freeit = false;
478 int ret = 0;
479
480
481 if (!locked)
482 return 0;
483
484 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
485 if (!bg_cache) {
486 ASSERT(0);
487 return -ENOENT;
488 }
489 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
490 goto out;
491
492 locks_root = &bg_cache->full_stripe_locks_root;
493 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
494
495 mutex_lock(&locks_root->lock);
496 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
497
498 if (!fstripe_lock) {
499 WARN_ON(1);
500 ret = -ENOENT;
501 mutex_unlock(&locks_root->lock);
502 goto out;
503 }
504
505 if (fstripe_lock->refs == 0) {
506 WARN_ON(1);
507 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
508 fstripe_lock->logical);
509 } else {
510 fstripe_lock->refs--;
511 }
512
513 if (fstripe_lock->refs == 0) {
514 rb_erase(&fstripe_lock->node, &locks_root->root);
515 freeit = true;
516 }
517 mutex_unlock(&locks_root->lock);
518
519 mutex_unlock(&fstripe_lock->mutex);
520 if (freeit)
521 kfree(fstripe_lock);
522 out:
523 btrfs_put_block_group(bg_cache);
524 return ret;
525 }
526
527 static void scrub_free_csums(struct scrub_ctx *sctx)
528 {
529 while (!list_empty(&sctx->csum_list)) {
530 struct btrfs_ordered_sum *sum;
531 sum = list_first_entry(&sctx->csum_list,
532 struct btrfs_ordered_sum, list);
533 list_del(&sum->list);
534 kfree(sum);
535 }
536 }
537
538 static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
539 {
540 int i;
541
542 if (!sctx)
543 return;
544
545
546 if (sctx->curr != -1) {
547 struct scrub_bio *sbio = sctx->bios[sctx->curr];
548
549 for (i = 0; i < sbio->page_count; i++) {
550 WARN_ON(!sbio->pagev[i]->page);
551 scrub_block_put(sbio->pagev[i]->sblock);
552 }
553 bio_put(sbio->bio);
554 }
555
556 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
557 struct scrub_bio *sbio = sctx->bios[i];
558
559 if (!sbio)
560 break;
561 kfree(sbio);
562 }
563
564 kfree(sctx->wr_curr_bio);
565 scrub_free_csums(sctx);
566 kfree(sctx);
567 }
568
569 static void scrub_put_ctx(struct scrub_ctx *sctx)
570 {
571 if (refcount_dec_and_test(&sctx->refs))
572 scrub_free_ctx(sctx);
573 }
574
575 static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
576 struct btrfs_fs_info *fs_info, int is_dev_replace)
577 {
578 struct scrub_ctx *sctx;
579 int i;
580
581 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
582 if (!sctx)
583 goto nomem;
584 refcount_set(&sctx->refs, 1);
585 sctx->is_dev_replace = is_dev_replace;
586 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
587 sctx->curr = -1;
588 sctx->fs_info = fs_info;
589 INIT_LIST_HEAD(&sctx->csum_list);
590 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
591 struct scrub_bio *sbio;
592
593 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
594 if (!sbio)
595 goto nomem;
596 sctx->bios[i] = sbio;
597
598 sbio->index = i;
599 sbio->sctx = sctx;
600 sbio->page_count = 0;
601 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
602 NULL);
603
604 if (i != SCRUB_BIOS_PER_SCTX - 1)
605 sctx->bios[i]->next_free = i + 1;
606 else
607 sctx->bios[i]->next_free = -1;
608 }
609 sctx->first_free = 0;
610 atomic_set(&sctx->bios_in_flight, 0);
611 atomic_set(&sctx->workers_pending, 0);
612 atomic_set(&sctx->cancel_req, 0);
613 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
614
615 spin_lock_init(&sctx->list_lock);
616 spin_lock_init(&sctx->stat_lock);
617 init_waitqueue_head(&sctx->list_wait);
618
619 WARN_ON(sctx->wr_curr_bio != NULL);
620 mutex_init(&sctx->wr_lock);
621 sctx->wr_curr_bio = NULL;
622 if (is_dev_replace) {
623 WARN_ON(!fs_info->dev_replace.tgtdev);
624 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
625 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
626 sctx->flush_all_writes = false;
627 }
628
629 return sctx;
630
631 nomem:
632 scrub_free_ctx(sctx);
633 return ERR_PTR(-ENOMEM);
634 }
635
636 static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
637 void *warn_ctx)
638 {
639 u64 isize;
640 u32 nlink;
641 int ret;
642 int i;
643 unsigned nofs_flag;
644 struct extent_buffer *eb;
645 struct btrfs_inode_item *inode_item;
646 struct scrub_warning *swarn = warn_ctx;
647 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
648 struct inode_fs_paths *ipath = NULL;
649 struct btrfs_root *local_root;
650 struct btrfs_key root_key;
651 struct btrfs_key key;
652
653 root_key.objectid = root;
654 root_key.type = BTRFS_ROOT_ITEM_KEY;
655 root_key.offset = (u64)-1;
656 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
657 if (IS_ERR(local_root)) {
658 ret = PTR_ERR(local_root);
659 goto err;
660 }
661
662
663
664
665 key.objectid = inum;
666 key.type = BTRFS_INODE_ITEM_KEY;
667 key.offset = 0;
668
669 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
670 if (ret) {
671 btrfs_release_path(swarn->path);
672 goto err;
673 }
674
675 eb = swarn->path->nodes[0];
676 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
677 struct btrfs_inode_item);
678 isize = btrfs_inode_size(eb, inode_item);
679 nlink = btrfs_inode_nlink(eb, inode_item);
680 btrfs_release_path(swarn->path);
681
682
683
684
685
686
687 nofs_flag = memalloc_nofs_save();
688 ipath = init_ipath(4096, local_root, swarn->path);
689 memalloc_nofs_restore(nofs_flag);
690 if (IS_ERR(ipath)) {
691 ret = PTR_ERR(ipath);
692 ipath = NULL;
693 goto err;
694 }
695 ret = paths_from_inode(inum, ipath);
696
697 if (ret < 0)
698 goto err;
699
700
701
702
703
704 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
705 btrfs_warn_in_rcu(fs_info,
706 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
707 swarn->errstr, swarn->logical,
708 rcu_str_deref(swarn->dev->name),
709 swarn->physical,
710 root, inum, offset,
711 min(isize - offset, (u64)PAGE_SIZE), nlink,
712 (char *)(unsigned long)ipath->fspath->val[i]);
713
714 free_ipath(ipath);
715 return 0;
716
717 err:
718 btrfs_warn_in_rcu(fs_info,
719 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
720 swarn->errstr, swarn->logical,
721 rcu_str_deref(swarn->dev->name),
722 swarn->physical,
723 root, inum, offset, ret);
724
725 free_ipath(ipath);
726 return 0;
727 }
728
729 static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
730 {
731 struct btrfs_device *dev;
732 struct btrfs_fs_info *fs_info;
733 struct btrfs_path *path;
734 struct btrfs_key found_key;
735 struct extent_buffer *eb;
736 struct btrfs_extent_item *ei;
737 struct scrub_warning swarn;
738 unsigned long ptr = 0;
739 u64 extent_item_pos;
740 u64 flags = 0;
741 u64 ref_root;
742 u32 item_size;
743 u8 ref_level = 0;
744 int ret;
745
746 WARN_ON(sblock->page_count < 1);
747 dev = sblock->pagev[0]->dev;
748 fs_info = sblock->sctx->fs_info;
749
750 path = btrfs_alloc_path();
751 if (!path)
752 return;
753
754 swarn.physical = sblock->pagev[0]->physical;
755 swarn.logical = sblock->pagev[0]->logical;
756 swarn.errstr = errstr;
757 swarn.dev = NULL;
758
759 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
760 &flags);
761 if (ret < 0)
762 goto out;
763
764 extent_item_pos = swarn.logical - found_key.objectid;
765 swarn.extent_item_size = found_key.offset;
766
767 eb = path->nodes[0];
768 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
769 item_size = btrfs_item_size_nr(eb, path->slots[0]);
770
771 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
772 do {
773 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
774 item_size, &ref_root,
775 &ref_level);
776 btrfs_warn_in_rcu(fs_info,
777 "%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
778 errstr, swarn.logical,
779 rcu_str_deref(dev->name),
780 swarn.physical,
781 ref_level ? "node" : "leaf",
782 ret < 0 ? -1 : ref_level,
783 ret < 0 ? -1 : ref_root);
784 } while (ret != 1);
785 btrfs_release_path(path);
786 } else {
787 btrfs_release_path(path);
788 swarn.path = path;
789 swarn.dev = dev;
790 iterate_extent_inodes(fs_info, found_key.objectid,
791 extent_item_pos, 1,
792 scrub_print_warning_inode, &swarn, false);
793 }
794
795 out:
796 btrfs_free_path(path);
797 }
798
799 static inline void scrub_get_recover(struct scrub_recover *recover)
800 {
801 refcount_inc(&recover->refs);
802 }
803
804 static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
805 struct scrub_recover *recover)
806 {
807 if (refcount_dec_and_test(&recover->refs)) {
808 btrfs_bio_counter_dec(fs_info);
809 btrfs_put_bbio(recover->bbio);
810 kfree(recover);
811 }
812 }
813
814
815
816
817
818
819
820
821
822 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
823 {
824 struct scrub_ctx *sctx = sblock_to_check->sctx;
825 struct btrfs_device *dev;
826 struct btrfs_fs_info *fs_info;
827 u64 logical;
828 unsigned int failed_mirror_index;
829 unsigned int is_metadata;
830 unsigned int have_csum;
831 struct scrub_block *sblocks_for_recheck;
832 struct scrub_block *sblock_bad;
833 int ret;
834 int mirror_index;
835 int page_num;
836 int success;
837 bool full_stripe_locked;
838 unsigned int nofs_flag;
839 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
840 DEFAULT_RATELIMIT_BURST);
841
842 BUG_ON(sblock_to_check->page_count < 1);
843 fs_info = sctx->fs_info;
844 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
845
846
847
848
849
850 spin_lock(&sctx->stat_lock);
851 ++sctx->stat.super_errors;
852 spin_unlock(&sctx->stat_lock);
853 return 0;
854 }
855 logical = sblock_to_check->pagev[0]->logical;
856 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
857 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
858 is_metadata = !(sblock_to_check->pagev[0]->flags &
859 BTRFS_EXTENT_FLAG_DATA);
860 have_csum = sblock_to_check->pagev[0]->have_csum;
861 dev = sblock_to_check->pagev[0]->dev;
862
863
864
865
866
867
868
869
870
871
872 nofs_flag = memalloc_nofs_save();
873
874
875
876
877
878
879
880 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
881 if (ret < 0) {
882 memalloc_nofs_restore(nofs_flag);
883 spin_lock(&sctx->stat_lock);
884 if (ret == -ENOMEM)
885 sctx->stat.malloc_errors++;
886 sctx->stat.read_errors++;
887 sctx->stat.uncorrectable_errors++;
888 spin_unlock(&sctx->stat_lock);
889 return ret;
890 }
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
922 sizeof(*sblocks_for_recheck), GFP_KERNEL);
923 if (!sblocks_for_recheck) {
924 spin_lock(&sctx->stat_lock);
925 sctx->stat.malloc_errors++;
926 sctx->stat.read_errors++;
927 sctx->stat.uncorrectable_errors++;
928 spin_unlock(&sctx->stat_lock);
929 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
930 goto out;
931 }
932
933
934 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
935 if (ret) {
936 spin_lock(&sctx->stat_lock);
937 sctx->stat.read_errors++;
938 sctx->stat.uncorrectable_errors++;
939 spin_unlock(&sctx->stat_lock);
940 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
941 goto out;
942 }
943 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
944 sblock_bad = sblocks_for_recheck + failed_mirror_index;
945
946
947 scrub_recheck_block(fs_info, sblock_bad, 1);
948
949 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
950 sblock_bad->no_io_error_seen) {
951
952
953
954
955
956
957
958
959 spin_lock(&sctx->stat_lock);
960 sctx->stat.unverified_errors++;
961 sblock_to_check->data_corrected = 1;
962 spin_unlock(&sctx->stat_lock);
963
964 if (sctx->is_dev_replace)
965 scrub_write_block_to_dev_replace(sblock_bad);
966 goto out;
967 }
968
969 if (!sblock_bad->no_io_error_seen) {
970 spin_lock(&sctx->stat_lock);
971 sctx->stat.read_errors++;
972 spin_unlock(&sctx->stat_lock);
973 if (__ratelimit(&_rs))
974 scrub_print_warning("i/o error", sblock_to_check);
975 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
976 } else if (sblock_bad->checksum_error) {
977 spin_lock(&sctx->stat_lock);
978 sctx->stat.csum_errors++;
979 spin_unlock(&sctx->stat_lock);
980 if (__ratelimit(&_rs))
981 scrub_print_warning("checksum error", sblock_to_check);
982 btrfs_dev_stat_inc_and_print(dev,
983 BTRFS_DEV_STAT_CORRUPTION_ERRS);
984 } else if (sblock_bad->header_error) {
985 spin_lock(&sctx->stat_lock);
986 sctx->stat.verify_errors++;
987 spin_unlock(&sctx->stat_lock);
988 if (__ratelimit(&_rs))
989 scrub_print_warning("checksum/header error",
990 sblock_to_check);
991 if (sblock_bad->generation_error)
992 btrfs_dev_stat_inc_and_print(dev,
993 BTRFS_DEV_STAT_GENERATION_ERRS);
994 else
995 btrfs_dev_stat_inc_and_print(dev,
996 BTRFS_DEV_STAT_CORRUPTION_ERRS);
997 }
998
999 if (sctx->readonly) {
1000 ASSERT(!sctx->is_dev_replace);
1001 goto out;
1002 }
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019 for (mirror_index = 0; ;mirror_index++) {
1020 struct scrub_block *sblock_other;
1021
1022 if (mirror_index == failed_mirror_index)
1023 continue;
1024
1025
1026 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1027 if (mirror_index >= BTRFS_MAX_MIRRORS)
1028 break;
1029 if (!sblocks_for_recheck[mirror_index].page_count)
1030 break;
1031
1032 sblock_other = sblocks_for_recheck + mirror_index;
1033 } else {
1034 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1035 int max_allowed = r->bbio->num_stripes -
1036 r->bbio->num_tgtdevs;
1037
1038 if (mirror_index >= max_allowed)
1039 break;
1040 if (!sblocks_for_recheck[1].page_count)
1041 break;
1042
1043 ASSERT(failed_mirror_index == 0);
1044 sblock_other = sblocks_for_recheck + 1;
1045 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1046 }
1047
1048
1049 scrub_recheck_block(fs_info, sblock_other, 0);
1050
1051 if (!sblock_other->header_error &&
1052 !sblock_other->checksum_error &&
1053 sblock_other->no_io_error_seen) {
1054 if (sctx->is_dev_replace) {
1055 scrub_write_block_to_dev_replace(sblock_other);
1056 goto corrected_error;
1057 } else {
1058 ret = scrub_repair_block_from_good_copy(
1059 sblock_bad, sblock_other);
1060 if (!ret)
1061 goto corrected_error;
1062 }
1063 }
1064 }
1065
1066 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1067 goto did_not_correct_error;
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 success = 1;
1094 for (page_num = 0; page_num < sblock_bad->page_count;
1095 page_num++) {
1096 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1097 struct scrub_block *sblock_other = NULL;
1098
1099
1100 if (!page_bad->io_error && !sctx->is_dev_replace)
1101 continue;
1102
1103 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1104
1105
1106
1107
1108
1109
1110
1111 sblock_other = NULL;
1112 } else if (page_bad->io_error) {
1113
1114 for (mirror_index = 0;
1115 mirror_index < BTRFS_MAX_MIRRORS &&
1116 sblocks_for_recheck[mirror_index].page_count > 0;
1117 mirror_index++) {
1118 if (!sblocks_for_recheck[mirror_index].
1119 pagev[page_num]->io_error) {
1120 sblock_other = sblocks_for_recheck +
1121 mirror_index;
1122 break;
1123 }
1124 }
1125 if (!sblock_other)
1126 success = 0;
1127 }
1128
1129 if (sctx->is_dev_replace) {
1130
1131
1132
1133
1134
1135
1136
1137 if (!sblock_other)
1138 sblock_other = sblock_bad;
1139
1140 if (scrub_write_page_to_dev_replace(sblock_other,
1141 page_num) != 0) {
1142 atomic64_inc(
1143 &fs_info->dev_replace.num_write_errors);
1144 success = 0;
1145 }
1146 } else if (sblock_other) {
1147 ret = scrub_repair_page_from_good_copy(sblock_bad,
1148 sblock_other,
1149 page_num, 0);
1150 if (0 == ret)
1151 page_bad->io_error = 0;
1152 else
1153 success = 0;
1154 }
1155 }
1156
1157 if (success && !sctx->is_dev_replace) {
1158 if (is_metadata || have_csum) {
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 scrub_recheck_block(fs_info, sblock_bad, 1);
1169 if (!sblock_bad->header_error &&
1170 !sblock_bad->checksum_error &&
1171 sblock_bad->no_io_error_seen)
1172 goto corrected_error;
1173 else
1174 goto did_not_correct_error;
1175 } else {
1176 corrected_error:
1177 spin_lock(&sctx->stat_lock);
1178 sctx->stat.corrected_errors++;
1179 sblock_to_check->data_corrected = 1;
1180 spin_unlock(&sctx->stat_lock);
1181 btrfs_err_rl_in_rcu(fs_info,
1182 "fixed up error at logical %llu on dev %s",
1183 logical, rcu_str_deref(dev->name));
1184 }
1185 } else {
1186 did_not_correct_error:
1187 spin_lock(&sctx->stat_lock);
1188 sctx->stat.uncorrectable_errors++;
1189 spin_unlock(&sctx->stat_lock);
1190 btrfs_err_rl_in_rcu(fs_info,
1191 "unable to fixup (regular) error at logical %llu on dev %s",
1192 logical, rcu_str_deref(dev->name));
1193 }
1194
1195 out:
1196 if (sblocks_for_recheck) {
1197 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1198 mirror_index++) {
1199 struct scrub_block *sblock = sblocks_for_recheck +
1200 mirror_index;
1201 struct scrub_recover *recover;
1202 int page_index;
1203
1204 for (page_index = 0; page_index < sblock->page_count;
1205 page_index++) {
1206 sblock->pagev[page_index]->sblock = NULL;
1207 recover = sblock->pagev[page_index]->recover;
1208 if (recover) {
1209 scrub_put_recover(fs_info, recover);
1210 sblock->pagev[page_index]->recover =
1211 NULL;
1212 }
1213 scrub_page_put(sblock->pagev[page_index]);
1214 }
1215 }
1216 kfree(sblocks_for_recheck);
1217 }
1218
1219 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1220 memalloc_nofs_restore(nofs_flag);
1221 if (ret < 0)
1222 return ret;
1223 return 0;
1224 }
1225
1226 static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1227 {
1228 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1229 return 2;
1230 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1231 return 3;
1232 else
1233 return (int)bbio->num_stripes;
1234 }
1235
1236 static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1237 u64 *raid_map,
1238 u64 mapped_length,
1239 int nstripes, int mirror,
1240 int *stripe_index,
1241 u64 *stripe_offset)
1242 {
1243 int i;
1244
1245 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1246
1247 for (i = 0; i < nstripes; i++) {
1248 if (raid_map[i] == RAID6_Q_STRIPE ||
1249 raid_map[i] == RAID5_P_STRIPE)
1250 continue;
1251
1252 if (logical >= raid_map[i] &&
1253 logical < raid_map[i] + mapped_length)
1254 break;
1255 }
1256
1257 *stripe_index = i;
1258 *stripe_offset = logical - raid_map[i];
1259 } else {
1260
1261 *stripe_index = mirror;
1262 *stripe_offset = 0;
1263 }
1264 }
1265
1266 static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1267 struct scrub_block *sblocks_for_recheck)
1268 {
1269 struct scrub_ctx *sctx = original_sblock->sctx;
1270 struct btrfs_fs_info *fs_info = sctx->fs_info;
1271 u64 length = original_sblock->page_count * PAGE_SIZE;
1272 u64 logical = original_sblock->pagev[0]->logical;
1273 u64 generation = original_sblock->pagev[0]->generation;
1274 u64 flags = original_sblock->pagev[0]->flags;
1275 u64 have_csum = original_sblock->pagev[0]->have_csum;
1276 struct scrub_recover *recover;
1277 struct btrfs_bio *bbio;
1278 u64 sublen;
1279 u64 mapped_length;
1280 u64 stripe_offset;
1281 int stripe_index;
1282 int page_index = 0;
1283 int mirror_index;
1284 int nmirrors;
1285 int ret;
1286
1287
1288
1289
1290
1291
1292
1293 while (length > 0) {
1294 sublen = min_t(u64, length, PAGE_SIZE);
1295 mapped_length = sublen;
1296 bbio = NULL;
1297
1298
1299
1300
1301
1302 btrfs_bio_counter_inc_blocked(fs_info);
1303 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1304 logical, &mapped_length, &bbio);
1305 if (ret || !bbio || mapped_length < sublen) {
1306 btrfs_put_bbio(bbio);
1307 btrfs_bio_counter_dec(fs_info);
1308 return -EIO;
1309 }
1310
1311 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1312 if (!recover) {
1313 btrfs_put_bbio(bbio);
1314 btrfs_bio_counter_dec(fs_info);
1315 return -ENOMEM;
1316 }
1317
1318 refcount_set(&recover->refs, 1);
1319 recover->bbio = bbio;
1320 recover->map_length = mapped_length;
1321
1322 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1323
1324 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1325
1326 for (mirror_index = 0; mirror_index < nmirrors;
1327 mirror_index++) {
1328 struct scrub_block *sblock;
1329 struct scrub_page *page;
1330
1331 sblock = sblocks_for_recheck + mirror_index;
1332 sblock->sctx = sctx;
1333
1334 page = kzalloc(sizeof(*page), GFP_NOFS);
1335 if (!page) {
1336 leave_nomem:
1337 spin_lock(&sctx->stat_lock);
1338 sctx->stat.malloc_errors++;
1339 spin_unlock(&sctx->stat_lock);
1340 scrub_put_recover(fs_info, recover);
1341 return -ENOMEM;
1342 }
1343 scrub_page_get(page);
1344 sblock->pagev[page_index] = page;
1345 page->sblock = sblock;
1346 page->flags = flags;
1347 page->generation = generation;
1348 page->logical = logical;
1349 page->have_csum = have_csum;
1350 if (have_csum)
1351 memcpy(page->csum,
1352 original_sblock->pagev[0]->csum,
1353 sctx->csum_size);
1354
1355 scrub_stripe_index_and_offset(logical,
1356 bbio->map_type,
1357 bbio->raid_map,
1358 mapped_length,
1359 bbio->num_stripes -
1360 bbio->num_tgtdevs,
1361 mirror_index,
1362 &stripe_index,
1363 &stripe_offset);
1364 page->physical = bbio->stripes[stripe_index].physical +
1365 stripe_offset;
1366 page->dev = bbio->stripes[stripe_index].dev;
1367
1368 BUG_ON(page_index >= original_sblock->page_count);
1369 page->physical_for_dev_replace =
1370 original_sblock->pagev[page_index]->
1371 physical_for_dev_replace;
1372
1373 page->mirror_num = mirror_index + 1;
1374 sblock->page_count++;
1375 page->page = alloc_page(GFP_NOFS);
1376 if (!page->page)
1377 goto leave_nomem;
1378
1379 scrub_get_recover(recover);
1380 page->recover = recover;
1381 }
1382 scrub_put_recover(fs_info, recover);
1383 length -= sublen;
1384 logical += sublen;
1385 page_index++;
1386 }
1387
1388 return 0;
1389 }
1390
1391 static void scrub_bio_wait_endio(struct bio *bio)
1392 {
1393 complete(bio->bi_private);
1394 }
1395
1396 static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1397 struct bio *bio,
1398 struct scrub_page *page)
1399 {
1400 DECLARE_COMPLETION_ONSTACK(done);
1401 int ret;
1402 int mirror_num;
1403
1404 bio->bi_iter.bi_sector = page->logical >> 9;
1405 bio->bi_private = &done;
1406 bio->bi_end_io = scrub_bio_wait_endio;
1407
1408 mirror_num = page->sblock->pagev[0]->mirror_num;
1409 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1410 page->recover->map_length,
1411 mirror_num, 0);
1412 if (ret)
1413 return ret;
1414
1415 wait_for_completion_io(&done);
1416 return blk_status_to_errno(bio->bi_status);
1417 }
1418
1419 static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1420 struct scrub_block *sblock)
1421 {
1422 struct scrub_page *first_page = sblock->pagev[0];
1423 struct bio *bio;
1424 int page_num;
1425
1426
1427 ASSERT(first_page->dev);
1428 if (!first_page->dev->bdev)
1429 goto out;
1430
1431 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1432 bio_set_dev(bio, first_page->dev->bdev);
1433
1434 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1435 struct scrub_page *page = sblock->pagev[page_num];
1436
1437 WARN_ON(!page->page);
1438 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1439 }
1440
1441 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1442 bio_put(bio);
1443 goto out;
1444 }
1445
1446 bio_put(bio);
1447
1448 scrub_recheck_block_checksum(sblock);
1449
1450 return;
1451 out:
1452 for (page_num = 0; page_num < sblock->page_count; page_num++)
1453 sblock->pagev[page_num]->io_error = 1;
1454
1455 sblock->no_io_error_seen = 0;
1456 }
1457
1458
1459
1460
1461
1462
1463
1464
1465 static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1466 struct scrub_block *sblock,
1467 int retry_failed_mirror)
1468 {
1469 int page_num;
1470
1471 sblock->no_io_error_seen = 1;
1472
1473
1474 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1475 return scrub_recheck_block_on_raid56(fs_info, sblock);
1476
1477 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1478 struct bio *bio;
1479 struct scrub_page *page = sblock->pagev[page_num];
1480
1481 if (page->dev->bdev == NULL) {
1482 page->io_error = 1;
1483 sblock->no_io_error_seen = 0;
1484 continue;
1485 }
1486
1487 WARN_ON(!page->page);
1488 bio = btrfs_io_bio_alloc(1);
1489 bio_set_dev(bio, page->dev->bdev);
1490
1491 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1492 bio->bi_iter.bi_sector = page->physical >> 9;
1493 bio->bi_opf = REQ_OP_READ;
1494
1495 if (btrfsic_submit_bio_wait(bio)) {
1496 page->io_error = 1;
1497 sblock->no_io_error_seen = 0;
1498 }
1499
1500 bio_put(bio);
1501 }
1502
1503 if (sblock->no_io_error_seen)
1504 scrub_recheck_block_checksum(sblock);
1505 }
1506
1507 static inline int scrub_check_fsid(u8 fsid[],
1508 struct scrub_page *spage)
1509 {
1510 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1511 int ret;
1512
1513 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1514 return !ret;
1515 }
1516
1517 static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1518 {
1519 sblock->header_error = 0;
1520 sblock->checksum_error = 0;
1521 sblock->generation_error = 0;
1522
1523 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1524 scrub_checksum_data(sblock);
1525 else
1526 scrub_checksum_tree_block(sblock);
1527 }
1528
1529 static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1530 struct scrub_block *sblock_good)
1531 {
1532 int page_num;
1533 int ret = 0;
1534
1535 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1536 int ret_sub;
1537
1538 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1539 sblock_good,
1540 page_num, 1);
1541 if (ret_sub)
1542 ret = ret_sub;
1543 }
1544
1545 return ret;
1546 }
1547
1548 static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1549 struct scrub_block *sblock_good,
1550 int page_num, int force_write)
1551 {
1552 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1553 struct scrub_page *page_good = sblock_good->pagev[page_num];
1554 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1555
1556 BUG_ON(page_bad->page == NULL);
1557 BUG_ON(page_good->page == NULL);
1558 if (force_write || sblock_bad->header_error ||
1559 sblock_bad->checksum_error || page_bad->io_error) {
1560 struct bio *bio;
1561 int ret;
1562
1563 if (!page_bad->dev->bdev) {
1564 btrfs_warn_rl(fs_info,
1565 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1566 return -EIO;
1567 }
1568
1569 bio = btrfs_io_bio_alloc(1);
1570 bio_set_dev(bio, page_bad->dev->bdev);
1571 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1572 bio->bi_opf = REQ_OP_WRITE;
1573
1574 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1575 if (PAGE_SIZE != ret) {
1576 bio_put(bio);
1577 return -EIO;
1578 }
1579
1580 if (btrfsic_submit_bio_wait(bio)) {
1581 btrfs_dev_stat_inc_and_print(page_bad->dev,
1582 BTRFS_DEV_STAT_WRITE_ERRS);
1583 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1584 bio_put(bio);
1585 return -EIO;
1586 }
1587 bio_put(bio);
1588 }
1589
1590 return 0;
1591 }
1592
1593 static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1594 {
1595 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1596 int page_num;
1597
1598
1599
1600
1601
1602 if (sblock->sparity)
1603 return;
1604
1605 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1606 int ret;
1607
1608 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1609 if (ret)
1610 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1611 }
1612 }
1613
1614 static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1615 int page_num)
1616 {
1617 struct scrub_page *spage = sblock->pagev[page_num];
1618
1619 BUG_ON(spage->page == NULL);
1620 if (spage->io_error) {
1621 void *mapped_buffer = kmap_atomic(spage->page);
1622
1623 clear_page(mapped_buffer);
1624 flush_dcache_page(spage->page);
1625 kunmap_atomic(mapped_buffer);
1626 }
1627 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1628 }
1629
1630 static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1631 struct scrub_page *spage)
1632 {
1633 struct scrub_bio *sbio;
1634 int ret;
1635
1636 mutex_lock(&sctx->wr_lock);
1637 again:
1638 if (!sctx->wr_curr_bio) {
1639 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1640 GFP_KERNEL);
1641 if (!sctx->wr_curr_bio) {
1642 mutex_unlock(&sctx->wr_lock);
1643 return -ENOMEM;
1644 }
1645 sctx->wr_curr_bio->sctx = sctx;
1646 sctx->wr_curr_bio->page_count = 0;
1647 }
1648 sbio = sctx->wr_curr_bio;
1649 if (sbio->page_count == 0) {
1650 struct bio *bio;
1651
1652 sbio->physical = spage->physical_for_dev_replace;
1653 sbio->logical = spage->logical;
1654 sbio->dev = sctx->wr_tgtdev;
1655 bio = sbio->bio;
1656 if (!bio) {
1657 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1658 sbio->bio = bio;
1659 }
1660
1661 bio->bi_private = sbio;
1662 bio->bi_end_io = scrub_wr_bio_end_io;
1663 bio_set_dev(bio, sbio->dev->bdev);
1664 bio->bi_iter.bi_sector = sbio->physical >> 9;
1665 bio->bi_opf = REQ_OP_WRITE;
1666 sbio->status = 0;
1667 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1668 spage->physical_for_dev_replace ||
1669 sbio->logical + sbio->page_count * PAGE_SIZE !=
1670 spage->logical) {
1671 scrub_wr_submit(sctx);
1672 goto again;
1673 }
1674
1675 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1676 if (ret != PAGE_SIZE) {
1677 if (sbio->page_count < 1) {
1678 bio_put(sbio->bio);
1679 sbio->bio = NULL;
1680 mutex_unlock(&sctx->wr_lock);
1681 return -EIO;
1682 }
1683 scrub_wr_submit(sctx);
1684 goto again;
1685 }
1686
1687 sbio->pagev[sbio->page_count] = spage;
1688 scrub_page_get(spage);
1689 sbio->page_count++;
1690 if (sbio->page_count == sctx->pages_per_wr_bio)
1691 scrub_wr_submit(sctx);
1692 mutex_unlock(&sctx->wr_lock);
1693
1694 return 0;
1695 }
1696
1697 static void scrub_wr_submit(struct scrub_ctx *sctx)
1698 {
1699 struct scrub_bio *sbio;
1700
1701 if (!sctx->wr_curr_bio)
1702 return;
1703
1704 sbio = sctx->wr_curr_bio;
1705 sctx->wr_curr_bio = NULL;
1706 WARN_ON(!sbio->bio->bi_disk);
1707 scrub_pending_bio_inc(sctx);
1708
1709
1710
1711
1712 btrfsic_submit_bio(sbio->bio);
1713 }
1714
1715 static void scrub_wr_bio_end_io(struct bio *bio)
1716 {
1717 struct scrub_bio *sbio = bio->bi_private;
1718 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1719
1720 sbio->status = bio->bi_status;
1721 sbio->bio = bio;
1722
1723 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
1724 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1725 }
1726
1727 static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1728 {
1729 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1730 struct scrub_ctx *sctx = sbio->sctx;
1731 int i;
1732
1733 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1734 if (sbio->status) {
1735 struct btrfs_dev_replace *dev_replace =
1736 &sbio->sctx->fs_info->dev_replace;
1737
1738 for (i = 0; i < sbio->page_count; i++) {
1739 struct scrub_page *spage = sbio->pagev[i];
1740
1741 spage->io_error = 1;
1742 atomic64_inc(&dev_replace->num_write_errors);
1743 }
1744 }
1745
1746 for (i = 0; i < sbio->page_count; i++)
1747 scrub_page_put(sbio->pagev[i]);
1748
1749 bio_put(sbio->bio);
1750 kfree(sbio);
1751 scrub_pending_bio_dec(sctx);
1752 }
1753
1754 static int scrub_checksum(struct scrub_block *sblock)
1755 {
1756 u64 flags;
1757 int ret;
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767 sblock->header_error = 0;
1768 sblock->generation_error = 0;
1769 sblock->checksum_error = 0;
1770
1771 WARN_ON(sblock->page_count < 1);
1772 flags = sblock->pagev[0]->flags;
1773 ret = 0;
1774 if (flags & BTRFS_EXTENT_FLAG_DATA)
1775 ret = scrub_checksum_data(sblock);
1776 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1777 ret = scrub_checksum_tree_block(sblock);
1778 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1779 (void)scrub_checksum_super(sblock);
1780 else
1781 WARN_ON(1);
1782 if (ret)
1783 scrub_handle_errored_block(sblock);
1784
1785 return ret;
1786 }
1787
1788 static int scrub_checksum_data(struct scrub_block *sblock)
1789 {
1790 struct scrub_ctx *sctx = sblock->sctx;
1791 struct btrfs_fs_info *fs_info = sctx->fs_info;
1792 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1793 u8 csum[BTRFS_CSUM_SIZE];
1794 u8 *on_disk_csum;
1795 struct page *page;
1796 void *buffer;
1797 u64 len;
1798 int index;
1799
1800 BUG_ON(sblock->page_count < 1);
1801 if (!sblock->pagev[0]->have_csum)
1802 return 0;
1803
1804 shash->tfm = fs_info->csum_shash;
1805 crypto_shash_init(shash);
1806
1807 on_disk_csum = sblock->pagev[0]->csum;
1808 page = sblock->pagev[0]->page;
1809 buffer = kmap_atomic(page);
1810
1811 len = sctx->fs_info->sectorsize;
1812 index = 0;
1813 for (;;) {
1814 u64 l = min_t(u64, len, PAGE_SIZE);
1815
1816 crypto_shash_update(shash, buffer, l);
1817 kunmap_atomic(buffer);
1818 len -= l;
1819 if (len == 0)
1820 break;
1821 index++;
1822 BUG_ON(index >= sblock->page_count);
1823 BUG_ON(!sblock->pagev[index]->page);
1824 page = sblock->pagev[index]->page;
1825 buffer = kmap_atomic(page);
1826 }
1827
1828 crypto_shash_final(shash, csum);
1829 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1830 sblock->checksum_error = 1;
1831
1832 return sblock->checksum_error;
1833 }
1834
1835 static int scrub_checksum_tree_block(struct scrub_block *sblock)
1836 {
1837 struct scrub_ctx *sctx = sblock->sctx;
1838 struct btrfs_header *h;
1839 struct btrfs_fs_info *fs_info = sctx->fs_info;
1840 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1841 u8 calculated_csum[BTRFS_CSUM_SIZE];
1842 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1843 struct page *page;
1844 void *mapped_buffer;
1845 u64 mapped_size;
1846 void *p;
1847 u64 len;
1848 int index;
1849
1850 shash->tfm = fs_info->csum_shash;
1851 crypto_shash_init(shash);
1852
1853 BUG_ON(sblock->page_count < 1);
1854 page = sblock->pagev[0]->page;
1855 mapped_buffer = kmap_atomic(page);
1856 h = (struct btrfs_header *)mapped_buffer;
1857 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1858
1859
1860
1861
1862
1863
1864 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1865 sblock->header_error = 1;
1866
1867 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1868 sblock->header_error = 1;
1869 sblock->generation_error = 1;
1870 }
1871
1872 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1873 sblock->header_error = 1;
1874
1875 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1876 BTRFS_UUID_SIZE))
1877 sblock->header_error = 1;
1878
1879 len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
1880 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1881 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1882 index = 0;
1883 for (;;) {
1884 u64 l = min_t(u64, len, mapped_size);
1885
1886 crypto_shash_update(shash, p, l);
1887 kunmap_atomic(mapped_buffer);
1888 len -= l;
1889 if (len == 0)
1890 break;
1891 index++;
1892 BUG_ON(index >= sblock->page_count);
1893 BUG_ON(!sblock->pagev[index]->page);
1894 page = sblock->pagev[index]->page;
1895 mapped_buffer = kmap_atomic(page);
1896 mapped_size = PAGE_SIZE;
1897 p = mapped_buffer;
1898 }
1899
1900 crypto_shash_final(shash, calculated_csum);
1901 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1902 sblock->checksum_error = 1;
1903
1904 return sblock->header_error || sblock->checksum_error;
1905 }
1906
1907 static int scrub_checksum_super(struct scrub_block *sblock)
1908 {
1909 struct btrfs_super_block *s;
1910 struct scrub_ctx *sctx = sblock->sctx;
1911 struct btrfs_fs_info *fs_info = sctx->fs_info;
1912 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1913 u8 calculated_csum[BTRFS_CSUM_SIZE];
1914 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1915 struct page *page;
1916 void *mapped_buffer;
1917 u64 mapped_size;
1918 void *p;
1919 int fail_gen = 0;
1920 int fail_cor = 0;
1921 u64 len;
1922 int index;
1923
1924 shash->tfm = fs_info->csum_shash;
1925 crypto_shash_init(shash);
1926
1927 BUG_ON(sblock->page_count < 1);
1928 page = sblock->pagev[0]->page;
1929 mapped_buffer = kmap_atomic(page);
1930 s = (struct btrfs_super_block *)mapped_buffer;
1931 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1932
1933 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1934 ++fail_cor;
1935
1936 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1937 ++fail_gen;
1938
1939 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1940 ++fail_cor;
1941
1942 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1943 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1944 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1945 index = 0;
1946 for (;;) {
1947 u64 l = min_t(u64, len, mapped_size);
1948
1949 crypto_shash_update(shash, p, l);
1950 kunmap_atomic(mapped_buffer);
1951 len -= l;
1952 if (len == 0)
1953 break;
1954 index++;
1955 BUG_ON(index >= sblock->page_count);
1956 BUG_ON(!sblock->pagev[index]->page);
1957 page = sblock->pagev[index]->page;
1958 mapped_buffer = kmap_atomic(page);
1959 mapped_size = PAGE_SIZE;
1960 p = mapped_buffer;
1961 }
1962
1963 crypto_shash_final(shash, calculated_csum);
1964 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1965 ++fail_cor;
1966
1967 if (fail_cor + fail_gen) {
1968
1969
1970
1971
1972
1973 spin_lock(&sctx->stat_lock);
1974 ++sctx->stat.super_errors;
1975 spin_unlock(&sctx->stat_lock);
1976 if (fail_cor)
1977 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1978 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1979 else
1980 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1981 BTRFS_DEV_STAT_GENERATION_ERRS);
1982 }
1983
1984 return fail_cor + fail_gen;
1985 }
1986
1987 static void scrub_block_get(struct scrub_block *sblock)
1988 {
1989 refcount_inc(&sblock->refs);
1990 }
1991
1992 static void scrub_block_put(struct scrub_block *sblock)
1993 {
1994 if (refcount_dec_and_test(&sblock->refs)) {
1995 int i;
1996
1997 if (sblock->sparity)
1998 scrub_parity_put(sblock->sparity);
1999
2000 for (i = 0; i < sblock->page_count; i++)
2001 scrub_page_put(sblock->pagev[i]);
2002 kfree(sblock);
2003 }
2004 }
2005
2006 static void scrub_page_get(struct scrub_page *spage)
2007 {
2008 atomic_inc(&spage->refs);
2009 }
2010
2011 static void scrub_page_put(struct scrub_page *spage)
2012 {
2013 if (atomic_dec_and_test(&spage->refs)) {
2014 if (spage->page)
2015 __free_page(spage->page);
2016 kfree(spage);
2017 }
2018 }
2019
2020 static void scrub_submit(struct scrub_ctx *sctx)
2021 {
2022 struct scrub_bio *sbio;
2023
2024 if (sctx->curr == -1)
2025 return;
2026
2027 sbio = sctx->bios[sctx->curr];
2028 sctx->curr = -1;
2029 scrub_pending_bio_inc(sctx);
2030 btrfsic_submit_bio(sbio->bio);
2031 }
2032
2033 static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2034 struct scrub_page *spage)
2035 {
2036 struct scrub_block *sblock = spage->sblock;
2037 struct scrub_bio *sbio;
2038 int ret;
2039
2040 again:
2041
2042
2043
2044 while (sctx->curr == -1) {
2045 spin_lock(&sctx->list_lock);
2046 sctx->curr = sctx->first_free;
2047 if (sctx->curr != -1) {
2048 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2049 sctx->bios[sctx->curr]->next_free = -1;
2050 sctx->bios[sctx->curr]->page_count = 0;
2051 spin_unlock(&sctx->list_lock);
2052 } else {
2053 spin_unlock(&sctx->list_lock);
2054 wait_event(sctx->list_wait, sctx->first_free != -1);
2055 }
2056 }
2057 sbio = sctx->bios[sctx->curr];
2058 if (sbio->page_count == 0) {
2059 struct bio *bio;
2060
2061 sbio->physical = spage->physical;
2062 sbio->logical = spage->logical;
2063 sbio->dev = spage->dev;
2064 bio = sbio->bio;
2065 if (!bio) {
2066 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2067 sbio->bio = bio;
2068 }
2069
2070 bio->bi_private = sbio;
2071 bio->bi_end_io = scrub_bio_end_io;
2072 bio_set_dev(bio, sbio->dev->bdev);
2073 bio->bi_iter.bi_sector = sbio->physical >> 9;
2074 bio->bi_opf = REQ_OP_READ;
2075 sbio->status = 0;
2076 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2077 spage->physical ||
2078 sbio->logical + sbio->page_count * PAGE_SIZE !=
2079 spage->logical ||
2080 sbio->dev != spage->dev) {
2081 scrub_submit(sctx);
2082 goto again;
2083 }
2084
2085 sbio->pagev[sbio->page_count] = spage;
2086 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2087 if (ret != PAGE_SIZE) {
2088 if (sbio->page_count < 1) {
2089 bio_put(sbio->bio);
2090 sbio->bio = NULL;
2091 return -EIO;
2092 }
2093 scrub_submit(sctx);
2094 goto again;
2095 }
2096
2097 scrub_block_get(sblock);
2098 atomic_inc(&sblock->outstanding_pages);
2099 sbio->page_count++;
2100 if (sbio->page_count == sctx->pages_per_rd_bio)
2101 scrub_submit(sctx);
2102
2103 return 0;
2104 }
2105
2106 static void scrub_missing_raid56_end_io(struct bio *bio)
2107 {
2108 struct scrub_block *sblock = bio->bi_private;
2109 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2110
2111 if (bio->bi_status)
2112 sblock->no_io_error_seen = 0;
2113
2114 bio_put(bio);
2115
2116 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2117 }
2118
2119 static void scrub_missing_raid56_worker(struct btrfs_work *work)
2120 {
2121 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2122 struct scrub_ctx *sctx = sblock->sctx;
2123 struct btrfs_fs_info *fs_info = sctx->fs_info;
2124 u64 logical;
2125 struct btrfs_device *dev;
2126
2127 logical = sblock->pagev[0]->logical;
2128 dev = sblock->pagev[0]->dev;
2129
2130 if (sblock->no_io_error_seen)
2131 scrub_recheck_block_checksum(sblock);
2132
2133 if (!sblock->no_io_error_seen) {
2134 spin_lock(&sctx->stat_lock);
2135 sctx->stat.read_errors++;
2136 spin_unlock(&sctx->stat_lock);
2137 btrfs_err_rl_in_rcu(fs_info,
2138 "IO error rebuilding logical %llu for dev %s",
2139 logical, rcu_str_deref(dev->name));
2140 } else if (sblock->header_error || sblock->checksum_error) {
2141 spin_lock(&sctx->stat_lock);
2142 sctx->stat.uncorrectable_errors++;
2143 spin_unlock(&sctx->stat_lock);
2144 btrfs_err_rl_in_rcu(fs_info,
2145 "failed to rebuild valid logical %llu for dev %s",
2146 logical, rcu_str_deref(dev->name));
2147 } else {
2148 scrub_write_block_to_dev_replace(sblock);
2149 }
2150
2151 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2152 mutex_lock(&sctx->wr_lock);
2153 scrub_wr_submit(sctx);
2154 mutex_unlock(&sctx->wr_lock);
2155 }
2156
2157 scrub_block_put(sblock);
2158 scrub_pending_bio_dec(sctx);
2159 }
2160
2161 static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2162 {
2163 struct scrub_ctx *sctx = sblock->sctx;
2164 struct btrfs_fs_info *fs_info = sctx->fs_info;
2165 u64 length = sblock->page_count * PAGE_SIZE;
2166 u64 logical = sblock->pagev[0]->logical;
2167 struct btrfs_bio *bbio = NULL;
2168 struct bio *bio;
2169 struct btrfs_raid_bio *rbio;
2170 int ret;
2171 int i;
2172
2173 btrfs_bio_counter_inc_blocked(fs_info);
2174 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2175 &length, &bbio);
2176 if (ret || !bbio || !bbio->raid_map)
2177 goto bbio_out;
2178
2179 if (WARN_ON(!sctx->is_dev_replace ||
2180 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2181
2182
2183
2184
2185
2186
2187 goto bbio_out;
2188 }
2189
2190 bio = btrfs_io_bio_alloc(0);
2191 bio->bi_iter.bi_sector = logical >> 9;
2192 bio->bi_private = sblock;
2193 bio->bi_end_io = scrub_missing_raid56_end_io;
2194
2195 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2196 if (!rbio)
2197 goto rbio_out;
2198
2199 for (i = 0; i < sblock->page_count; i++) {
2200 struct scrub_page *spage = sblock->pagev[i];
2201
2202 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2203 }
2204
2205 btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
2206 scrub_block_get(sblock);
2207 scrub_pending_bio_inc(sctx);
2208 raid56_submit_missing_rbio(rbio);
2209 return;
2210
2211 rbio_out:
2212 bio_put(bio);
2213 bbio_out:
2214 btrfs_bio_counter_dec(fs_info);
2215 btrfs_put_bbio(bbio);
2216 spin_lock(&sctx->stat_lock);
2217 sctx->stat.malloc_errors++;
2218 spin_unlock(&sctx->stat_lock);
2219 }
2220
2221 static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2222 u64 physical, struct btrfs_device *dev, u64 flags,
2223 u64 gen, int mirror_num, u8 *csum, int force,
2224 u64 physical_for_dev_replace)
2225 {
2226 struct scrub_block *sblock;
2227 int index;
2228
2229 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2230 if (!sblock) {
2231 spin_lock(&sctx->stat_lock);
2232 sctx->stat.malloc_errors++;
2233 spin_unlock(&sctx->stat_lock);
2234 return -ENOMEM;
2235 }
2236
2237
2238
2239 refcount_set(&sblock->refs, 1);
2240 sblock->sctx = sctx;
2241 sblock->no_io_error_seen = 1;
2242
2243 for (index = 0; len > 0; index++) {
2244 struct scrub_page *spage;
2245 u64 l = min_t(u64, len, PAGE_SIZE);
2246
2247 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2248 if (!spage) {
2249 leave_nomem:
2250 spin_lock(&sctx->stat_lock);
2251 sctx->stat.malloc_errors++;
2252 spin_unlock(&sctx->stat_lock);
2253 scrub_block_put(sblock);
2254 return -ENOMEM;
2255 }
2256 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2257 scrub_page_get(spage);
2258 sblock->pagev[index] = spage;
2259 spage->sblock = sblock;
2260 spage->dev = dev;
2261 spage->flags = flags;
2262 spage->generation = gen;
2263 spage->logical = logical;
2264 spage->physical = physical;
2265 spage->physical_for_dev_replace = physical_for_dev_replace;
2266 spage->mirror_num = mirror_num;
2267 if (csum) {
2268 spage->have_csum = 1;
2269 memcpy(spage->csum, csum, sctx->csum_size);
2270 } else {
2271 spage->have_csum = 0;
2272 }
2273 sblock->page_count++;
2274 spage->page = alloc_page(GFP_KERNEL);
2275 if (!spage->page)
2276 goto leave_nomem;
2277 len -= l;
2278 logical += l;
2279 physical += l;
2280 physical_for_dev_replace += l;
2281 }
2282
2283 WARN_ON(sblock->page_count == 0);
2284 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2285
2286
2287
2288
2289 scrub_missing_raid56_pages(sblock);
2290 } else {
2291 for (index = 0; index < sblock->page_count; index++) {
2292 struct scrub_page *spage = sblock->pagev[index];
2293 int ret;
2294
2295 ret = scrub_add_page_to_rd_bio(sctx, spage);
2296 if (ret) {
2297 scrub_block_put(sblock);
2298 return ret;
2299 }
2300 }
2301
2302 if (force)
2303 scrub_submit(sctx);
2304 }
2305
2306
2307 scrub_block_put(sblock);
2308 return 0;
2309 }
2310
2311 static void scrub_bio_end_io(struct bio *bio)
2312 {
2313 struct scrub_bio *sbio = bio->bi_private;
2314 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2315
2316 sbio->status = bio->bi_status;
2317 sbio->bio = bio;
2318
2319 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2320 }
2321
2322 static void scrub_bio_end_io_worker(struct btrfs_work *work)
2323 {
2324 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2325 struct scrub_ctx *sctx = sbio->sctx;
2326 int i;
2327
2328 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2329 if (sbio->status) {
2330 for (i = 0; i < sbio->page_count; i++) {
2331 struct scrub_page *spage = sbio->pagev[i];
2332
2333 spage->io_error = 1;
2334 spage->sblock->no_io_error_seen = 0;
2335 }
2336 }
2337
2338
2339 for (i = 0; i < sbio->page_count; i++) {
2340 struct scrub_page *spage = sbio->pagev[i];
2341 struct scrub_block *sblock = spage->sblock;
2342
2343 if (atomic_dec_and_test(&sblock->outstanding_pages))
2344 scrub_block_complete(sblock);
2345 scrub_block_put(sblock);
2346 }
2347
2348 bio_put(sbio->bio);
2349 sbio->bio = NULL;
2350 spin_lock(&sctx->list_lock);
2351 sbio->next_free = sctx->first_free;
2352 sctx->first_free = sbio->index;
2353 spin_unlock(&sctx->list_lock);
2354
2355 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2356 mutex_lock(&sctx->wr_lock);
2357 scrub_wr_submit(sctx);
2358 mutex_unlock(&sctx->wr_lock);
2359 }
2360
2361 scrub_pending_bio_dec(sctx);
2362 }
2363
2364 static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2365 unsigned long *bitmap,
2366 u64 start, u64 len)
2367 {
2368 u64 offset;
2369 u64 nsectors64;
2370 u32 nsectors;
2371 int sectorsize = sparity->sctx->fs_info->sectorsize;
2372
2373 if (len >= sparity->stripe_len) {
2374 bitmap_set(bitmap, 0, sparity->nsectors);
2375 return;
2376 }
2377
2378 start -= sparity->logic_start;
2379 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2380 offset = div_u64(offset, sectorsize);
2381 nsectors64 = div_u64(len, sectorsize);
2382
2383 ASSERT(nsectors64 < UINT_MAX);
2384 nsectors = (u32)nsectors64;
2385
2386 if (offset + nsectors <= sparity->nsectors) {
2387 bitmap_set(bitmap, offset, nsectors);
2388 return;
2389 }
2390
2391 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2392 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2393 }
2394
2395 static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2396 u64 start, u64 len)
2397 {
2398 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2399 }
2400
2401 static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2402 u64 start, u64 len)
2403 {
2404 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2405 }
2406
2407 static void scrub_block_complete(struct scrub_block *sblock)
2408 {
2409 int corrupted = 0;
2410
2411 if (!sblock->no_io_error_seen) {
2412 corrupted = 1;
2413 scrub_handle_errored_block(sblock);
2414 } else {
2415
2416
2417
2418
2419
2420 corrupted = scrub_checksum(sblock);
2421 if (!corrupted && sblock->sctx->is_dev_replace)
2422 scrub_write_block_to_dev_replace(sblock);
2423 }
2424
2425 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2426 u64 start = sblock->pagev[0]->logical;
2427 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2428 PAGE_SIZE;
2429
2430 scrub_parity_mark_sectors_error(sblock->sparity,
2431 start, end - start);
2432 }
2433 }
2434
2435 static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2436 {
2437 struct btrfs_ordered_sum *sum = NULL;
2438 unsigned long index;
2439 unsigned long num_sectors;
2440
2441 while (!list_empty(&sctx->csum_list)) {
2442 sum = list_first_entry(&sctx->csum_list,
2443 struct btrfs_ordered_sum, list);
2444 if (sum->bytenr > logical)
2445 return 0;
2446 if (sum->bytenr + sum->len > logical)
2447 break;
2448
2449 ++sctx->stat.csum_discards;
2450 list_del(&sum->list);
2451 kfree(sum);
2452 sum = NULL;
2453 }
2454 if (!sum)
2455 return 0;
2456
2457 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2458 ASSERT(index < UINT_MAX);
2459
2460 num_sectors = sum->len / sctx->fs_info->sectorsize;
2461 memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
2462 if (index == num_sectors - 1) {
2463 list_del(&sum->list);
2464 kfree(sum);
2465 }
2466 return 1;
2467 }
2468
2469
2470 static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2471 u64 logical, u64 len,
2472 u64 physical, struct btrfs_device *dev, u64 flags,
2473 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2474 {
2475 int ret;
2476 u8 csum[BTRFS_CSUM_SIZE];
2477 u32 blocksize;
2478
2479 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2480 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2481 blocksize = map->stripe_len;
2482 else
2483 blocksize = sctx->fs_info->sectorsize;
2484 spin_lock(&sctx->stat_lock);
2485 sctx->stat.data_extents_scrubbed++;
2486 sctx->stat.data_bytes_scrubbed += len;
2487 spin_unlock(&sctx->stat_lock);
2488 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2489 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2490 blocksize = map->stripe_len;
2491 else
2492 blocksize = sctx->fs_info->nodesize;
2493 spin_lock(&sctx->stat_lock);
2494 sctx->stat.tree_extents_scrubbed++;
2495 sctx->stat.tree_bytes_scrubbed += len;
2496 spin_unlock(&sctx->stat_lock);
2497 } else {
2498 blocksize = sctx->fs_info->sectorsize;
2499 WARN_ON(1);
2500 }
2501
2502 while (len) {
2503 u64 l = min_t(u64, len, blocksize);
2504 int have_csum = 0;
2505
2506 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2507
2508 have_csum = scrub_find_csum(sctx, logical, csum);
2509 if (have_csum == 0)
2510 ++sctx->stat.no_csum;
2511 }
2512 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2513 mirror_num, have_csum ? csum : NULL, 0,
2514 physical_for_dev_replace);
2515 if (ret)
2516 return ret;
2517 len -= l;
2518 logical += l;
2519 physical += l;
2520 physical_for_dev_replace += l;
2521 }
2522 return 0;
2523 }
2524
2525 static int scrub_pages_for_parity(struct scrub_parity *sparity,
2526 u64 logical, u64 len,
2527 u64 physical, struct btrfs_device *dev,
2528 u64 flags, u64 gen, int mirror_num, u8 *csum)
2529 {
2530 struct scrub_ctx *sctx = sparity->sctx;
2531 struct scrub_block *sblock;
2532 int index;
2533
2534 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2535 if (!sblock) {
2536 spin_lock(&sctx->stat_lock);
2537 sctx->stat.malloc_errors++;
2538 spin_unlock(&sctx->stat_lock);
2539 return -ENOMEM;
2540 }
2541
2542
2543
2544 refcount_set(&sblock->refs, 1);
2545 sblock->sctx = sctx;
2546 sblock->no_io_error_seen = 1;
2547 sblock->sparity = sparity;
2548 scrub_parity_get(sparity);
2549
2550 for (index = 0; len > 0; index++) {
2551 struct scrub_page *spage;
2552 u64 l = min_t(u64, len, PAGE_SIZE);
2553
2554 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2555 if (!spage) {
2556 leave_nomem:
2557 spin_lock(&sctx->stat_lock);
2558 sctx->stat.malloc_errors++;
2559 spin_unlock(&sctx->stat_lock);
2560 scrub_block_put(sblock);
2561 return -ENOMEM;
2562 }
2563 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2564
2565 scrub_page_get(spage);
2566 sblock->pagev[index] = spage;
2567
2568 scrub_page_get(spage);
2569 list_add_tail(&spage->list, &sparity->spages);
2570 spage->sblock = sblock;
2571 spage->dev = dev;
2572 spage->flags = flags;
2573 spage->generation = gen;
2574 spage->logical = logical;
2575 spage->physical = physical;
2576 spage->mirror_num = mirror_num;
2577 if (csum) {
2578 spage->have_csum = 1;
2579 memcpy(spage->csum, csum, sctx->csum_size);
2580 } else {
2581 spage->have_csum = 0;
2582 }
2583 sblock->page_count++;
2584 spage->page = alloc_page(GFP_KERNEL);
2585 if (!spage->page)
2586 goto leave_nomem;
2587 len -= l;
2588 logical += l;
2589 physical += l;
2590 }
2591
2592 WARN_ON(sblock->page_count == 0);
2593 for (index = 0; index < sblock->page_count; index++) {
2594 struct scrub_page *spage = sblock->pagev[index];
2595 int ret;
2596
2597 ret = scrub_add_page_to_rd_bio(sctx, spage);
2598 if (ret) {
2599 scrub_block_put(sblock);
2600 return ret;
2601 }
2602 }
2603
2604
2605 scrub_block_put(sblock);
2606 return 0;
2607 }
2608
2609 static int scrub_extent_for_parity(struct scrub_parity *sparity,
2610 u64 logical, u64 len,
2611 u64 physical, struct btrfs_device *dev,
2612 u64 flags, u64 gen, int mirror_num)
2613 {
2614 struct scrub_ctx *sctx = sparity->sctx;
2615 int ret;
2616 u8 csum[BTRFS_CSUM_SIZE];
2617 u32 blocksize;
2618
2619 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2620 scrub_parity_mark_sectors_error(sparity, logical, len);
2621 return 0;
2622 }
2623
2624 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2625 blocksize = sparity->stripe_len;
2626 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2627 blocksize = sparity->stripe_len;
2628 } else {
2629 blocksize = sctx->fs_info->sectorsize;
2630 WARN_ON(1);
2631 }
2632
2633 while (len) {
2634 u64 l = min_t(u64, len, blocksize);
2635 int have_csum = 0;
2636
2637 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2638
2639 have_csum = scrub_find_csum(sctx, logical, csum);
2640 if (have_csum == 0)
2641 goto skip;
2642 }
2643 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2644 flags, gen, mirror_num,
2645 have_csum ? csum : NULL);
2646 if (ret)
2647 return ret;
2648 skip:
2649 len -= l;
2650 logical += l;
2651 physical += l;
2652 }
2653 return 0;
2654 }
2655
2656
2657
2658
2659
2660
2661
2662
2663 static int get_raid56_logic_offset(u64 physical, int num,
2664 struct map_lookup *map, u64 *offset,
2665 u64 *stripe_start)
2666 {
2667 int i;
2668 int j = 0;
2669 u64 stripe_nr;
2670 u64 last_offset;
2671 u32 stripe_index;
2672 u32 rot;
2673 const int data_stripes = nr_data_stripes(map);
2674
2675 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2676 if (stripe_start)
2677 *stripe_start = last_offset;
2678
2679 *offset = last_offset;
2680 for (i = 0; i < data_stripes; i++) {
2681 *offset = last_offset + i * map->stripe_len;
2682
2683 stripe_nr = div64_u64(*offset, map->stripe_len);
2684 stripe_nr = div_u64(stripe_nr, data_stripes);
2685
2686
2687 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2688
2689 rot += i;
2690 stripe_index = rot % map->num_stripes;
2691 if (stripe_index == num)
2692 return 0;
2693 if (stripe_index < num)
2694 j++;
2695 }
2696 *offset = last_offset + j * map->stripe_len;
2697 return 1;
2698 }
2699
2700 static void scrub_free_parity(struct scrub_parity *sparity)
2701 {
2702 struct scrub_ctx *sctx = sparity->sctx;
2703 struct scrub_page *curr, *next;
2704 int nbits;
2705
2706 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2707 if (nbits) {
2708 spin_lock(&sctx->stat_lock);
2709 sctx->stat.read_errors += nbits;
2710 sctx->stat.uncorrectable_errors += nbits;
2711 spin_unlock(&sctx->stat_lock);
2712 }
2713
2714 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2715 list_del_init(&curr->list);
2716 scrub_page_put(curr);
2717 }
2718
2719 kfree(sparity);
2720 }
2721
2722 static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2723 {
2724 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2725 work);
2726 struct scrub_ctx *sctx = sparity->sctx;
2727
2728 scrub_free_parity(sparity);
2729 scrub_pending_bio_dec(sctx);
2730 }
2731
2732 static void scrub_parity_bio_endio(struct bio *bio)
2733 {
2734 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2735 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2736
2737 if (bio->bi_status)
2738 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2739 sparity->nsectors);
2740
2741 bio_put(bio);
2742
2743 btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
2744 NULL);
2745 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2746 }
2747
2748 static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2749 {
2750 struct scrub_ctx *sctx = sparity->sctx;
2751 struct btrfs_fs_info *fs_info = sctx->fs_info;
2752 struct bio *bio;
2753 struct btrfs_raid_bio *rbio;
2754 struct btrfs_bio *bbio = NULL;
2755 u64 length;
2756 int ret;
2757
2758 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2759 sparity->nsectors))
2760 goto out;
2761
2762 length = sparity->logic_end - sparity->logic_start;
2763
2764 btrfs_bio_counter_inc_blocked(fs_info);
2765 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2766 &length, &bbio);
2767 if (ret || !bbio || !bbio->raid_map)
2768 goto bbio_out;
2769
2770 bio = btrfs_io_bio_alloc(0);
2771 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2772 bio->bi_private = sparity;
2773 bio->bi_end_io = scrub_parity_bio_endio;
2774
2775 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2776 length, sparity->scrub_dev,
2777 sparity->dbitmap,
2778 sparity->nsectors);
2779 if (!rbio)
2780 goto rbio_out;
2781
2782 scrub_pending_bio_inc(sctx);
2783 raid56_parity_submit_scrub_rbio(rbio);
2784 return;
2785
2786 rbio_out:
2787 bio_put(bio);
2788 bbio_out:
2789 btrfs_bio_counter_dec(fs_info);
2790 btrfs_put_bbio(bbio);
2791 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2792 sparity->nsectors);
2793 spin_lock(&sctx->stat_lock);
2794 sctx->stat.malloc_errors++;
2795 spin_unlock(&sctx->stat_lock);
2796 out:
2797 scrub_free_parity(sparity);
2798 }
2799
2800 static inline int scrub_calc_parity_bitmap_len(int nsectors)
2801 {
2802 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2803 }
2804
2805 static void scrub_parity_get(struct scrub_parity *sparity)
2806 {
2807 refcount_inc(&sparity->refs);
2808 }
2809
2810 static void scrub_parity_put(struct scrub_parity *sparity)
2811 {
2812 if (!refcount_dec_and_test(&sparity->refs))
2813 return;
2814
2815 scrub_parity_check_and_repair(sparity);
2816 }
2817
2818 static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2819 struct map_lookup *map,
2820 struct btrfs_device *sdev,
2821 struct btrfs_path *path,
2822 u64 logic_start,
2823 u64 logic_end)
2824 {
2825 struct btrfs_fs_info *fs_info = sctx->fs_info;
2826 struct btrfs_root *root = fs_info->extent_root;
2827 struct btrfs_root *csum_root = fs_info->csum_root;
2828 struct btrfs_extent_item *extent;
2829 struct btrfs_bio *bbio = NULL;
2830 u64 flags;
2831 int ret;
2832 int slot;
2833 struct extent_buffer *l;
2834 struct btrfs_key key;
2835 u64 generation;
2836 u64 extent_logical;
2837 u64 extent_physical;
2838 u64 extent_len;
2839 u64 mapped_length;
2840 struct btrfs_device *extent_dev;
2841 struct scrub_parity *sparity;
2842 int nsectors;
2843 int bitmap_len;
2844 int extent_mirror_num;
2845 int stop_loop = 0;
2846
2847 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
2848 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2849 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2850 GFP_NOFS);
2851 if (!sparity) {
2852 spin_lock(&sctx->stat_lock);
2853 sctx->stat.malloc_errors++;
2854 spin_unlock(&sctx->stat_lock);
2855 return -ENOMEM;
2856 }
2857
2858 sparity->stripe_len = map->stripe_len;
2859 sparity->nsectors = nsectors;
2860 sparity->sctx = sctx;
2861 sparity->scrub_dev = sdev;
2862 sparity->logic_start = logic_start;
2863 sparity->logic_end = logic_end;
2864 refcount_set(&sparity->refs, 1);
2865 INIT_LIST_HEAD(&sparity->spages);
2866 sparity->dbitmap = sparity->bitmap;
2867 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2868
2869 ret = 0;
2870 while (logic_start < logic_end) {
2871 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2872 key.type = BTRFS_METADATA_ITEM_KEY;
2873 else
2874 key.type = BTRFS_EXTENT_ITEM_KEY;
2875 key.objectid = logic_start;
2876 key.offset = (u64)-1;
2877
2878 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2879 if (ret < 0)
2880 goto out;
2881
2882 if (ret > 0) {
2883 ret = btrfs_previous_extent_item(root, path, 0);
2884 if (ret < 0)
2885 goto out;
2886 if (ret > 0) {
2887 btrfs_release_path(path);
2888 ret = btrfs_search_slot(NULL, root, &key,
2889 path, 0, 0);
2890 if (ret < 0)
2891 goto out;
2892 }
2893 }
2894
2895 stop_loop = 0;
2896 while (1) {
2897 u64 bytes;
2898
2899 l = path->nodes[0];
2900 slot = path->slots[0];
2901 if (slot >= btrfs_header_nritems(l)) {
2902 ret = btrfs_next_leaf(root, path);
2903 if (ret == 0)
2904 continue;
2905 if (ret < 0)
2906 goto out;
2907
2908 stop_loop = 1;
2909 break;
2910 }
2911 btrfs_item_key_to_cpu(l, &key, slot);
2912
2913 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2914 key.type != BTRFS_METADATA_ITEM_KEY)
2915 goto next;
2916
2917 if (key.type == BTRFS_METADATA_ITEM_KEY)
2918 bytes = fs_info->nodesize;
2919 else
2920 bytes = key.offset;
2921
2922 if (key.objectid + bytes <= logic_start)
2923 goto next;
2924
2925 if (key.objectid >= logic_end) {
2926 stop_loop = 1;
2927 break;
2928 }
2929
2930 while (key.objectid >= logic_start + map->stripe_len)
2931 logic_start += map->stripe_len;
2932
2933 extent = btrfs_item_ptr(l, slot,
2934 struct btrfs_extent_item);
2935 flags = btrfs_extent_flags(l, extent);
2936 generation = btrfs_extent_generation(l, extent);
2937
2938 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2939 (key.objectid < logic_start ||
2940 key.objectid + bytes >
2941 logic_start + map->stripe_len)) {
2942 btrfs_err(fs_info,
2943 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2944 key.objectid, logic_start);
2945 spin_lock(&sctx->stat_lock);
2946 sctx->stat.uncorrectable_errors++;
2947 spin_unlock(&sctx->stat_lock);
2948 goto next;
2949 }
2950 again:
2951 extent_logical = key.objectid;
2952 extent_len = bytes;
2953
2954 if (extent_logical < logic_start) {
2955 extent_len -= logic_start - extent_logical;
2956 extent_logical = logic_start;
2957 }
2958
2959 if (extent_logical + extent_len >
2960 logic_start + map->stripe_len)
2961 extent_len = logic_start + map->stripe_len -
2962 extent_logical;
2963
2964 scrub_parity_mark_sectors_data(sparity, extent_logical,
2965 extent_len);
2966
2967 mapped_length = extent_len;
2968 bbio = NULL;
2969 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
2970 extent_logical, &mapped_length, &bbio,
2971 0);
2972 if (!ret) {
2973 if (!bbio || mapped_length < extent_len)
2974 ret = -EIO;
2975 }
2976 if (ret) {
2977 btrfs_put_bbio(bbio);
2978 goto out;
2979 }
2980 extent_physical = bbio->stripes[0].physical;
2981 extent_mirror_num = bbio->mirror_num;
2982 extent_dev = bbio->stripes[0].dev;
2983 btrfs_put_bbio(bbio);
2984
2985 ret = btrfs_lookup_csums_range(csum_root,
2986 extent_logical,
2987 extent_logical + extent_len - 1,
2988 &sctx->csum_list, 1);
2989 if (ret)
2990 goto out;
2991
2992 ret = scrub_extent_for_parity(sparity, extent_logical,
2993 extent_len,
2994 extent_physical,
2995 extent_dev, flags,
2996 generation,
2997 extent_mirror_num);
2998
2999 scrub_free_csums(sctx);
3000
3001 if (ret)
3002 goto out;
3003
3004 if (extent_logical + extent_len <
3005 key.objectid + bytes) {
3006 logic_start += map->stripe_len;
3007
3008 if (logic_start >= logic_end) {
3009 stop_loop = 1;
3010 break;
3011 }
3012
3013 if (logic_start < key.objectid + bytes) {
3014 cond_resched();
3015 goto again;
3016 }
3017 }
3018 next:
3019 path->slots[0]++;
3020 }
3021
3022 btrfs_release_path(path);
3023
3024 if (stop_loop)
3025 break;
3026
3027 logic_start += map->stripe_len;
3028 }
3029 out:
3030 if (ret < 0)
3031 scrub_parity_mark_sectors_error(sparity, logic_start,
3032 logic_end - logic_start);
3033 scrub_parity_put(sparity);
3034 scrub_submit(sctx);
3035 mutex_lock(&sctx->wr_lock);
3036 scrub_wr_submit(sctx);
3037 mutex_unlock(&sctx->wr_lock);
3038
3039 btrfs_release_path(path);
3040 return ret < 0 ? ret : 0;
3041 }
3042
3043 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3044 struct map_lookup *map,
3045 struct btrfs_device *scrub_dev,
3046 int num, u64 base, u64 length)
3047 {
3048 struct btrfs_path *path, *ppath;
3049 struct btrfs_fs_info *fs_info = sctx->fs_info;
3050 struct btrfs_root *root = fs_info->extent_root;
3051 struct btrfs_root *csum_root = fs_info->csum_root;
3052 struct btrfs_extent_item *extent;
3053 struct blk_plug plug;
3054 u64 flags;
3055 int ret;
3056 int slot;
3057 u64 nstripes;
3058 struct extent_buffer *l;
3059 u64 physical;
3060 u64 logical;
3061 u64 logic_end;
3062 u64 physical_end;
3063 u64 generation;
3064 int mirror_num;
3065 struct reada_control *reada1;
3066 struct reada_control *reada2;
3067 struct btrfs_key key;
3068 struct btrfs_key key_end;
3069 u64 increment = map->stripe_len;
3070 u64 offset;
3071 u64 extent_logical;
3072 u64 extent_physical;
3073 u64 extent_len;
3074 u64 stripe_logical;
3075 u64 stripe_end;
3076 struct btrfs_device *extent_dev;
3077 int extent_mirror_num;
3078 int stop_loop = 0;
3079
3080 physical = map->stripes[num].physical;
3081 offset = 0;
3082 nstripes = div64_u64(length, map->stripe_len);
3083 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3084 offset = map->stripe_len * num;
3085 increment = map->stripe_len * map->num_stripes;
3086 mirror_num = 1;
3087 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3088 int factor = map->num_stripes / map->sub_stripes;
3089 offset = map->stripe_len * (num / map->sub_stripes);
3090 increment = map->stripe_len * factor;
3091 mirror_num = num % map->sub_stripes + 1;
3092 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3093 increment = map->stripe_len;
3094 mirror_num = num % map->num_stripes + 1;
3095 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3096 increment = map->stripe_len;
3097 mirror_num = num % map->num_stripes + 1;
3098 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3099 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3100 increment = map->stripe_len * nr_data_stripes(map);
3101 mirror_num = 1;
3102 } else {
3103 increment = map->stripe_len;
3104 mirror_num = 1;
3105 }
3106
3107 path = btrfs_alloc_path();
3108 if (!path)
3109 return -ENOMEM;
3110
3111 ppath = btrfs_alloc_path();
3112 if (!ppath) {
3113 btrfs_free_path(path);
3114 return -ENOMEM;
3115 }
3116
3117
3118
3119
3120
3121
3122 path->search_commit_root = 1;
3123 path->skip_locking = 1;
3124
3125 ppath->search_commit_root = 1;
3126 ppath->skip_locking = 1;
3127
3128
3129
3130
3131
3132 logical = base + offset;
3133 physical_end = physical + nstripes * map->stripe_len;
3134 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3135 get_raid56_logic_offset(physical_end, num,
3136 map, &logic_end, NULL);
3137 logic_end += base;
3138 } else {
3139 logic_end = logical + increment * nstripes;
3140 }
3141 wait_event(sctx->list_wait,
3142 atomic_read(&sctx->bios_in_flight) == 0);
3143 scrub_blocked_if_needed(fs_info);
3144
3145
3146 key.objectid = logical;
3147 key.type = BTRFS_EXTENT_ITEM_KEY;
3148 key.offset = (u64)0;
3149 key_end.objectid = logic_end;
3150 key_end.type = BTRFS_METADATA_ITEM_KEY;
3151 key_end.offset = (u64)-1;
3152 reada1 = btrfs_reada_add(root, &key, &key_end);
3153
3154 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3155 key.type = BTRFS_EXTENT_CSUM_KEY;
3156 key.offset = logical;
3157 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3158 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3159 key_end.offset = logic_end;
3160 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3161
3162 if (!IS_ERR(reada1))
3163 btrfs_reada_wait(reada1);
3164 if (!IS_ERR(reada2))
3165 btrfs_reada_wait(reada2);
3166
3167
3168
3169
3170
3171
3172 blk_start_plug(&plug);
3173
3174
3175
3176
3177 ret = 0;
3178 while (physical < physical_end) {
3179
3180
3181
3182 if (atomic_read(&fs_info->scrub_cancel_req) ||
3183 atomic_read(&sctx->cancel_req)) {
3184 ret = -ECANCELED;
3185 goto out;
3186 }
3187
3188
3189
3190 if (atomic_read(&fs_info->scrub_pause_req)) {
3191
3192 sctx->flush_all_writes = true;
3193 scrub_submit(sctx);
3194 mutex_lock(&sctx->wr_lock);
3195 scrub_wr_submit(sctx);
3196 mutex_unlock(&sctx->wr_lock);
3197 wait_event(sctx->list_wait,
3198 atomic_read(&sctx->bios_in_flight) == 0);
3199 sctx->flush_all_writes = false;
3200 scrub_blocked_if_needed(fs_info);
3201 }
3202
3203 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3204 ret = get_raid56_logic_offset(physical, num, map,
3205 &logical,
3206 &stripe_logical);
3207 logical += base;
3208 if (ret) {
3209
3210 stripe_logical += base;
3211 stripe_end = stripe_logical + increment;
3212 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3213 ppath, stripe_logical,
3214 stripe_end);
3215 if (ret)
3216 goto out;
3217 goto skip;
3218 }
3219 }
3220
3221 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3222 key.type = BTRFS_METADATA_ITEM_KEY;
3223 else
3224 key.type = BTRFS_EXTENT_ITEM_KEY;
3225 key.objectid = logical;
3226 key.offset = (u64)-1;
3227
3228 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3229 if (ret < 0)
3230 goto out;
3231
3232 if (ret > 0) {
3233 ret = btrfs_previous_extent_item(root, path, 0);
3234 if (ret < 0)
3235 goto out;
3236 if (ret > 0) {
3237
3238
3239 btrfs_release_path(path);
3240 ret = btrfs_search_slot(NULL, root, &key,
3241 path, 0, 0);
3242 if (ret < 0)
3243 goto out;
3244 }
3245 }
3246
3247 stop_loop = 0;
3248 while (1) {
3249 u64 bytes;
3250
3251 l = path->nodes[0];
3252 slot = path->slots[0];
3253 if (slot >= btrfs_header_nritems(l)) {
3254 ret = btrfs_next_leaf(root, path);
3255 if (ret == 0)
3256 continue;
3257 if (ret < 0)
3258 goto out;
3259
3260 stop_loop = 1;
3261 break;
3262 }
3263 btrfs_item_key_to_cpu(l, &key, slot);
3264
3265 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3266 key.type != BTRFS_METADATA_ITEM_KEY)
3267 goto next;
3268
3269 if (key.type == BTRFS_METADATA_ITEM_KEY)
3270 bytes = fs_info->nodesize;
3271 else
3272 bytes = key.offset;
3273
3274 if (key.objectid + bytes <= logical)
3275 goto next;
3276
3277 if (key.objectid >= logical + map->stripe_len) {
3278
3279 if (key.objectid >= logic_end)
3280 stop_loop = 1;
3281 break;
3282 }
3283
3284 extent = btrfs_item_ptr(l, slot,
3285 struct btrfs_extent_item);
3286 flags = btrfs_extent_flags(l, extent);
3287 generation = btrfs_extent_generation(l, extent);
3288
3289 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3290 (key.objectid < logical ||
3291 key.objectid + bytes >
3292 logical + map->stripe_len)) {
3293 btrfs_err(fs_info,
3294 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3295 key.objectid, logical);
3296 spin_lock(&sctx->stat_lock);
3297 sctx->stat.uncorrectable_errors++;
3298 spin_unlock(&sctx->stat_lock);
3299 goto next;
3300 }
3301
3302 again:
3303 extent_logical = key.objectid;
3304 extent_len = bytes;
3305
3306
3307
3308
3309 if (extent_logical < logical) {
3310 extent_len -= logical - extent_logical;
3311 extent_logical = logical;
3312 }
3313 if (extent_logical + extent_len >
3314 logical + map->stripe_len) {
3315 extent_len = logical + map->stripe_len -
3316 extent_logical;
3317 }
3318
3319 extent_physical = extent_logical - logical + physical;
3320 extent_dev = scrub_dev;
3321 extent_mirror_num = mirror_num;
3322 if (sctx->is_dev_replace)
3323 scrub_remap_extent(fs_info, extent_logical,
3324 extent_len, &extent_physical,
3325 &extent_dev,
3326 &extent_mirror_num);
3327
3328 ret = btrfs_lookup_csums_range(csum_root,
3329 extent_logical,
3330 extent_logical +
3331 extent_len - 1,
3332 &sctx->csum_list, 1);
3333 if (ret)
3334 goto out;
3335
3336 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3337 extent_physical, extent_dev, flags,
3338 generation, extent_mirror_num,
3339 extent_logical - logical + physical);
3340
3341 scrub_free_csums(sctx);
3342
3343 if (ret)
3344 goto out;
3345
3346 if (extent_logical + extent_len <
3347 key.objectid + bytes) {
3348 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3349
3350
3351
3352
3353 loop:
3354 physical += map->stripe_len;
3355 ret = get_raid56_logic_offset(physical,
3356 num, map, &logical,
3357 &stripe_logical);
3358 logical += base;
3359
3360 if (ret && physical < physical_end) {
3361 stripe_logical += base;
3362 stripe_end = stripe_logical +
3363 increment;
3364 ret = scrub_raid56_parity(sctx,
3365 map, scrub_dev, ppath,
3366 stripe_logical,
3367 stripe_end);
3368 if (ret)
3369 goto out;
3370 goto loop;
3371 }
3372 } else {
3373 physical += map->stripe_len;
3374 logical += increment;
3375 }
3376 if (logical < key.objectid + bytes) {
3377 cond_resched();
3378 goto again;
3379 }
3380
3381 if (physical >= physical_end) {
3382 stop_loop = 1;
3383 break;
3384 }
3385 }
3386 next:
3387 path->slots[0]++;
3388 }
3389 btrfs_release_path(path);
3390 skip:
3391 logical += increment;
3392 physical += map->stripe_len;
3393 spin_lock(&sctx->stat_lock);
3394 if (stop_loop)
3395 sctx->stat.last_physical = map->stripes[num].physical +
3396 length;
3397 else
3398 sctx->stat.last_physical = physical;
3399 spin_unlock(&sctx->stat_lock);
3400 if (stop_loop)
3401 break;
3402 }
3403 out:
3404
3405 scrub_submit(sctx);
3406 mutex_lock(&sctx->wr_lock);
3407 scrub_wr_submit(sctx);
3408 mutex_unlock(&sctx->wr_lock);
3409
3410 blk_finish_plug(&plug);
3411 btrfs_free_path(path);
3412 btrfs_free_path(ppath);
3413 return ret < 0 ? ret : 0;
3414 }
3415
3416 static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3417 struct btrfs_device *scrub_dev,
3418 u64 chunk_offset, u64 length,
3419 u64 dev_offset,
3420 struct btrfs_block_group_cache *cache)
3421 {
3422 struct btrfs_fs_info *fs_info = sctx->fs_info;
3423 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3424 struct map_lookup *map;
3425 struct extent_map *em;
3426 int i;
3427 int ret = 0;
3428
3429 read_lock(&map_tree->lock);
3430 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3431 read_unlock(&map_tree->lock);
3432
3433 if (!em) {
3434
3435
3436
3437
3438 spin_lock(&cache->lock);
3439 if (!cache->removed)
3440 ret = -EINVAL;
3441 spin_unlock(&cache->lock);
3442
3443 return ret;
3444 }
3445
3446 map = em->map_lookup;
3447 if (em->start != chunk_offset)
3448 goto out;
3449
3450 if (em->len < length)
3451 goto out;
3452
3453 for (i = 0; i < map->num_stripes; ++i) {
3454 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3455 map->stripes[i].physical == dev_offset) {
3456 ret = scrub_stripe(sctx, map, scrub_dev, i,
3457 chunk_offset, length);
3458 if (ret)
3459 goto out;
3460 }
3461 }
3462 out:
3463 free_extent_map(em);
3464
3465 return ret;
3466 }
3467
3468 static noinline_for_stack
3469 int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3470 struct btrfs_device *scrub_dev, u64 start, u64 end)
3471 {
3472 struct btrfs_dev_extent *dev_extent = NULL;
3473 struct btrfs_path *path;
3474 struct btrfs_fs_info *fs_info = sctx->fs_info;
3475 struct btrfs_root *root = fs_info->dev_root;
3476 u64 length;
3477 u64 chunk_offset;
3478 int ret = 0;
3479 int ro_set;
3480 int slot;
3481 struct extent_buffer *l;
3482 struct btrfs_key key;
3483 struct btrfs_key found_key;
3484 struct btrfs_block_group_cache *cache;
3485 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3486
3487 path = btrfs_alloc_path();
3488 if (!path)
3489 return -ENOMEM;
3490
3491 path->reada = READA_FORWARD;
3492 path->search_commit_root = 1;
3493 path->skip_locking = 1;
3494
3495 key.objectid = scrub_dev->devid;
3496 key.offset = 0ull;
3497 key.type = BTRFS_DEV_EXTENT_KEY;
3498
3499 while (1) {
3500 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3501 if (ret < 0)
3502 break;
3503 if (ret > 0) {
3504 if (path->slots[0] >=
3505 btrfs_header_nritems(path->nodes[0])) {
3506 ret = btrfs_next_leaf(root, path);
3507 if (ret < 0)
3508 break;
3509 if (ret > 0) {
3510 ret = 0;
3511 break;
3512 }
3513 } else {
3514 ret = 0;
3515 }
3516 }
3517
3518 l = path->nodes[0];
3519 slot = path->slots[0];
3520
3521 btrfs_item_key_to_cpu(l, &found_key, slot);
3522
3523 if (found_key.objectid != scrub_dev->devid)
3524 break;
3525
3526 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3527 break;
3528
3529 if (found_key.offset >= end)
3530 break;
3531
3532 if (found_key.offset < key.offset)
3533 break;
3534
3535 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3536 length = btrfs_dev_extent_length(l, dev_extent);
3537
3538 if (found_key.offset + length <= start)
3539 goto skip;
3540
3541 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3542
3543
3544
3545
3546
3547 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3548
3549
3550
3551 if (!cache)
3552 goto skip;
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562 scrub_pause_on(fs_info);
3563 ret = btrfs_inc_block_group_ro(cache);
3564 if (!ret && sctx->is_dev_replace) {
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583 btrfs_wait_block_group_reservations(cache);
3584 btrfs_wait_nocow_writers(cache);
3585 ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
3586 cache->key.objectid,
3587 cache->key.offset);
3588 if (ret > 0) {
3589 struct btrfs_trans_handle *trans;
3590
3591 trans = btrfs_join_transaction(root);
3592 if (IS_ERR(trans))
3593 ret = PTR_ERR(trans);
3594 else
3595 ret = btrfs_commit_transaction(trans);
3596 if (ret) {
3597 scrub_pause_off(fs_info);
3598 btrfs_put_block_group(cache);
3599 break;
3600 }
3601 }
3602 }
3603 scrub_pause_off(fs_info);
3604
3605 if (ret == 0) {
3606 ro_set = 1;
3607 } else if (ret == -ENOSPC) {
3608
3609
3610
3611
3612
3613
3614
3615 ro_set = 0;
3616 } else {
3617 btrfs_warn(fs_info,
3618 "failed setting block group ro: %d", ret);
3619 btrfs_put_block_group(cache);
3620 break;
3621 }
3622
3623 down_write(&fs_info->dev_replace.rwsem);
3624 dev_replace->cursor_right = found_key.offset + length;
3625 dev_replace->cursor_left = found_key.offset;
3626 dev_replace->item_needs_writeback = 1;
3627 up_write(&dev_replace->rwsem);
3628
3629 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3630 found_key.offset, cache);
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642 sctx->flush_all_writes = true;
3643 scrub_submit(sctx);
3644 mutex_lock(&sctx->wr_lock);
3645 scrub_wr_submit(sctx);
3646 mutex_unlock(&sctx->wr_lock);
3647
3648 wait_event(sctx->list_wait,
3649 atomic_read(&sctx->bios_in_flight) == 0);
3650
3651 scrub_pause_on(fs_info);
3652
3653
3654
3655
3656
3657
3658 wait_event(sctx->list_wait,
3659 atomic_read(&sctx->workers_pending) == 0);
3660 sctx->flush_all_writes = false;
3661
3662 scrub_pause_off(fs_info);
3663
3664 down_write(&fs_info->dev_replace.rwsem);
3665 dev_replace->cursor_left = dev_replace->cursor_right;
3666 dev_replace->item_needs_writeback = 1;
3667 up_write(&fs_info->dev_replace.rwsem);
3668
3669 if (ro_set)
3670 btrfs_dec_block_group_ro(cache);
3671
3672
3673
3674
3675
3676
3677
3678
3679 spin_lock(&cache->lock);
3680 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3681 btrfs_block_group_used(&cache->item) == 0) {
3682 spin_unlock(&cache->lock);
3683 btrfs_mark_bg_unused(cache);
3684 } else {
3685 spin_unlock(&cache->lock);
3686 }
3687
3688 btrfs_put_block_group(cache);
3689 if (ret)
3690 break;
3691 if (sctx->is_dev_replace &&
3692 atomic64_read(&dev_replace->num_write_errors) > 0) {
3693 ret = -EIO;
3694 break;
3695 }
3696 if (sctx->stat.malloc_errors > 0) {
3697 ret = -ENOMEM;
3698 break;
3699 }
3700 skip:
3701 key.offset = found_key.offset + length;
3702 btrfs_release_path(path);
3703 }
3704
3705 btrfs_free_path(path);
3706
3707 return ret;
3708 }
3709
3710 static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3711 struct btrfs_device *scrub_dev)
3712 {
3713 int i;
3714 u64 bytenr;
3715 u64 gen;
3716 int ret;
3717 struct btrfs_fs_info *fs_info = sctx->fs_info;
3718
3719 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3720 return -EIO;
3721
3722
3723 if (scrub_dev->fs_devices != fs_info->fs_devices)
3724 gen = scrub_dev->generation;
3725 else
3726 gen = fs_info->last_trans_committed;
3727
3728 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3729 bytenr = btrfs_sb_offset(i);
3730 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3731 scrub_dev->commit_total_bytes)
3732 break;
3733
3734 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3735 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3736 NULL, 1, bytenr);
3737 if (ret)
3738 return ret;
3739 }
3740 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3741
3742 return 0;
3743 }
3744
3745
3746
3747
3748 static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3749 int is_dev_replace)
3750 {
3751 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3752 int max_active = fs_info->thread_pool_size;
3753
3754 lockdep_assert_held(&fs_info->scrub_lock);
3755
3756 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
3757 ASSERT(fs_info->scrub_workers == NULL);
3758 fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
3759 flags, is_dev_replace ? 1 : max_active, 4);
3760 if (!fs_info->scrub_workers)
3761 goto fail_scrub_workers;
3762
3763 ASSERT(fs_info->scrub_wr_completion_workers == NULL);
3764 fs_info->scrub_wr_completion_workers =
3765 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3766 max_active, 2);
3767 if (!fs_info->scrub_wr_completion_workers)
3768 goto fail_scrub_wr_completion_workers;
3769
3770 ASSERT(fs_info->scrub_parity_workers == NULL);
3771 fs_info->scrub_parity_workers =
3772 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3773 max_active, 2);
3774 if (!fs_info->scrub_parity_workers)
3775 goto fail_scrub_parity_workers;
3776
3777 refcount_set(&fs_info->scrub_workers_refcnt, 1);
3778 } else {
3779 refcount_inc(&fs_info->scrub_workers_refcnt);
3780 }
3781 return 0;
3782
3783 fail_scrub_parity_workers:
3784 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3785 fail_scrub_wr_completion_workers:
3786 btrfs_destroy_workqueue(fs_info->scrub_workers);
3787 fail_scrub_workers:
3788 return -ENOMEM;
3789 }
3790
3791 int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3792 u64 end, struct btrfs_scrub_progress *progress,
3793 int readonly, int is_dev_replace)
3794 {
3795 struct scrub_ctx *sctx;
3796 int ret;
3797 struct btrfs_device *dev;
3798 unsigned int nofs_flag;
3799 struct btrfs_workqueue *scrub_workers = NULL;
3800 struct btrfs_workqueue *scrub_wr_comp = NULL;
3801 struct btrfs_workqueue *scrub_parity = NULL;
3802
3803 if (btrfs_fs_closing(fs_info))
3804 return -EAGAIN;
3805
3806 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3807
3808
3809
3810
3811
3812 btrfs_err(fs_info,
3813 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3814 fs_info->nodesize,
3815 BTRFS_STRIPE_LEN);
3816 return -EINVAL;
3817 }
3818
3819 if (fs_info->sectorsize != PAGE_SIZE) {
3820
3821 btrfs_err_rl(fs_info,
3822 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3823 fs_info->sectorsize, PAGE_SIZE);
3824 return -EINVAL;
3825 }
3826
3827 if (fs_info->nodesize >
3828 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3829 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3830
3831
3832
3833
3834 btrfs_err(fs_info,
3835 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3836 fs_info->nodesize,
3837 SCRUB_MAX_PAGES_PER_BLOCK,
3838 fs_info->sectorsize,
3839 SCRUB_MAX_PAGES_PER_BLOCK);
3840 return -EINVAL;
3841 }
3842
3843
3844 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
3845 if (IS_ERR(sctx))
3846 return PTR_ERR(sctx);
3847
3848 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3849 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
3850 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
3851 !is_dev_replace)) {
3852 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3853 ret = -ENODEV;
3854 goto out_free_ctx;
3855 }
3856
3857 if (!is_dev_replace && !readonly &&
3858 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
3859 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3860 btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
3861 rcu_str_deref(dev->name));
3862 ret = -EROFS;
3863 goto out_free_ctx;
3864 }
3865
3866 mutex_lock(&fs_info->scrub_lock);
3867 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
3868 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
3869 mutex_unlock(&fs_info->scrub_lock);
3870 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3871 ret = -EIO;
3872 goto out_free_ctx;
3873 }
3874
3875 down_read(&fs_info->dev_replace.rwsem);
3876 if (dev->scrub_ctx ||
3877 (!is_dev_replace &&
3878 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3879 up_read(&fs_info->dev_replace.rwsem);
3880 mutex_unlock(&fs_info->scrub_lock);
3881 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3882 ret = -EINPROGRESS;
3883 goto out_free_ctx;
3884 }
3885 up_read(&fs_info->dev_replace.rwsem);
3886
3887 ret = scrub_workers_get(fs_info, is_dev_replace);
3888 if (ret) {
3889 mutex_unlock(&fs_info->scrub_lock);
3890 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3891 goto out_free_ctx;
3892 }
3893
3894 sctx->readonly = readonly;
3895 dev->scrub_ctx = sctx;
3896 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3897
3898
3899
3900
3901
3902 __scrub_blocked_if_needed(fs_info);
3903 atomic_inc(&fs_info->scrubs_running);
3904 mutex_unlock(&fs_info->scrub_lock);
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915 nofs_flag = memalloc_nofs_save();
3916 if (!is_dev_replace) {
3917 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
3918
3919
3920
3921
3922 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3923 ret = scrub_supers(sctx, dev);
3924 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3925 }
3926
3927 if (!ret)
3928 ret = scrub_enumerate_chunks(sctx, dev, start, end);
3929 memalloc_nofs_restore(nofs_flag);
3930
3931 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3932 atomic_dec(&fs_info->scrubs_running);
3933 wake_up(&fs_info->scrub_pause_wait);
3934
3935 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3936
3937 if (progress)
3938 memcpy(progress, &sctx->stat, sizeof(*progress));
3939
3940 if (!is_dev_replace)
3941 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
3942 ret ? "not finished" : "finished", devid, ret);
3943
3944 mutex_lock(&fs_info->scrub_lock);
3945 dev->scrub_ctx = NULL;
3946 if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
3947 scrub_workers = fs_info->scrub_workers;
3948 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3949 scrub_parity = fs_info->scrub_parity_workers;
3950
3951 fs_info->scrub_workers = NULL;
3952 fs_info->scrub_wr_completion_workers = NULL;
3953 fs_info->scrub_parity_workers = NULL;
3954 }
3955 mutex_unlock(&fs_info->scrub_lock);
3956
3957 btrfs_destroy_workqueue(scrub_workers);
3958 btrfs_destroy_workqueue(scrub_wr_comp);
3959 btrfs_destroy_workqueue(scrub_parity);
3960 scrub_put_ctx(sctx);
3961
3962 return ret;
3963
3964 out_free_ctx:
3965 scrub_free_ctx(sctx);
3966
3967 return ret;
3968 }
3969
3970 void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
3971 {
3972 mutex_lock(&fs_info->scrub_lock);
3973 atomic_inc(&fs_info->scrub_pause_req);
3974 while (atomic_read(&fs_info->scrubs_paused) !=
3975 atomic_read(&fs_info->scrubs_running)) {
3976 mutex_unlock(&fs_info->scrub_lock);
3977 wait_event(fs_info->scrub_pause_wait,
3978 atomic_read(&fs_info->scrubs_paused) ==
3979 atomic_read(&fs_info->scrubs_running));
3980 mutex_lock(&fs_info->scrub_lock);
3981 }
3982 mutex_unlock(&fs_info->scrub_lock);
3983 }
3984
3985 void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
3986 {
3987 atomic_dec(&fs_info->scrub_pause_req);
3988 wake_up(&fs_info->scrub_pause_wait);
3989 }
3990
3991 int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3992 {
3993 mutex_lock(&fs_info->scrub_lock);
3994 if (!atomic_read(&fs_info->scrubs_running)) {
3995 mutex_unlock(&fs_info->scrub_lock);
3996 return -ENOTCONN;
3997 }
3998
3999 atomic_inc(&fs_info->scrub_cancel_req);
4000 while (atomic_read(&fs_info->scrubs_running)) {
4001 mutex_unlock(&fs_info->scrub_lock);
4002 wait_event(fs_info->scrub_pause_wait,
4003 atomic_read(&fs_info->scrubs_running) == 0);
4004 mutex_lock(&fs_info->scrub_lock);
4005 }
4006 atomic_dec(&fs_info->scrub_cancel_req);
4007 mutex_unlock(&fs_info->scrub_lock);
4008
4009 return 0;
4010 }
4011
4012 int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4013 {
4014 struct btrfs_fs_info *fs_info = dev->fs_info;
4015 struct scrub_ctx *sctx;
4016
4017 mutex_lock(&fs_info->scrub_lock);
4018 sctx = dev->scrub_ctx;
4019 if (!sctx) {
4020 mutex_unlock(&fs_info->scrub_lock);
4021 return -ENOTCONN;
4022 }
4023 atomic_inc(&sctx->cancel_req);
4024 while (dev->scrub_ctx) {
4025 mutex_unlock(&fs_info->scrub_lock);
4026 wait_event(fs_info->scrub_pause_wait,
4027 dev->scrub_ctx == NULL);
4028 mutex_lock(&fs_info->scrub_lock);
4029 }
4030 mutex_unlock(&fs_info->scrub_lock);
4031
4032 return 0;
4033 }
4034
4035 int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4036 struct btrfs_scrub_progress *progress)
4037 {
4038 struct btrfs_device *dev;
4039 struct scrub_ctx *sctx = NULL;
4040
4041 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4042 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
4043 if (dev)
4044 sctx = dev->scrub_ctx;
4045 if (sctx)
4046 memcpy(progress, &sctx->stat, sizeof(*progress));
4047 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4048
4049 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4050 }
4051
4052 static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4053 u64 extent_logical, u64 extent_len,
4054 u64 *extent_physical,
4055 struct btrfs_device **extent_dev,
4056 int *extent_mirror_num)
4057 {
4058 u64 mapped_length;
4059 struct btrfs_bio *bbio = NULL;
4060 int ret;
4061
4062 mapped_length = extent_len;
4063 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4064 &mapped_length, &bbio, 0);
4065 if (ret || !bbio || mapped_length < extent_len ||
4066 !bbio->stripes[0].dev->bdev) {
4067 btrfs_put_bbio(bbio);
4068 return;
4069 }
4070
4071 *extent_physical = bbio->stripes[0].physical;
4072 *extent_mirror_num = bbio->mirror_num;
4073 *extent_dev = bbio->stripes[0].dev;
4074 btrfs_put_bbio(bbio);
4075 }