This source file includes following definitions.
- btrfs_bg_type_to_raid_name
- btrfs_describe_block_groups
- btrfs_get_fs_uuids
- alloc_fs_devices
- btrfs_free_device
- free_fs_devices
- btrfs_cleanup_fs_uuids
- __alloc_device
- find_fsid
- btrfs_get_bdev_and_sb
- requeue_list
- run_scheduled_bios
- pending_bios_fn
- device_path_matched
- btrfs_free_stale_devices
- btrfs_open_one_device
- find_fsid_inprogress
- find_fsid_changed
- find_fsid_reverted_metadata
- device_list_add
- clone_fs_devices
- btrfs_free_extra_devids
- btrfs_close_bdev
- btrfs_close_one_device
- close_fs_devices
- btrfs_close_devices
- open_fs_devices
- devid_cmp
- btrfs_open_devices
- btrfs_release_disk_super
- btrfs_read_disk_super
- btrfs_forget_devices
- btrfs_scan_one_device
- contains_pending_extent
- find_free_dev_extent_start
- find_free_dev_extent
- btrfs_free_dev_extent
- btrfs_alloc_dev_extent
- find_next_chunk
- find_next_devid
- btrfs_add_dev_item
- update_dev_time
- btrfs_rm_dev_item
- btrfs_check_raid_min_devices
- btrfs_find_next_active_device
- btrfs_assign_next_active_device
- btrfs_num_devices
- btrfs_rm_device
- btrfs_rm_dev_replace_remove_srcdev
- btrfs_rm_dev_replace_free_srcdev
- btrfs_destroy_dev_replace_tgtdev
- btrfs_find_device_by_path
- btrfs_find_device_by_devspec
- btrfs_prepare_sprout
- btrfs_finish_sprout
- btrfs_init_new_device
- btrfs_update_device
- btrfs_grow_device
- btrfs_free_chunk
- btrfs_del_sys_chunk
- btrfs_get_chunk_map
- btrfs_remove_chunk
- btrfs_relocate_chunk
- btrfs_relocate_sys_chunks
- btrfs_may_alloc_data_chunk
- insert_balance_item
- del_balance_item
- update_balance_args
- reset_balance_state
- chunk_profiles_filter
- chunk_usage_range_filter
- chunk_usage_filter
- chunk_devid_filter
- calc_data_stripes
- chunk_drange_filter
- chunk_vrange_filter
- chunk_stripes_range_filter
- chunk_soft_convert_filter
- should_balance_chunk
- __btrfs_balance
- alloc_profile_is_valid
- balance_need_close
- validate_convert_profile
- describe_balance_args
- describe_balance_start_or_resume
- btrfs_balance
- balance_kthread
- btrfs_resume_balance_async
- btrfs_recover_balance
- btrfs_pause_balance
- btrfs_cancel_balance
- btrfs_uuid_scan_kthread
- btrfs_check_uuid_tree_entry
- btrfs_uuid_rescan_kthread
- btrfs_create_uuid_tree
- btrfs_check_uuid_tree
- btrfs_shrink_device
- btrfs_add_system_chunk
- btrfs_cmp_device_info
- check_raid56_incompat_flag
- __btrfs_alloc_chunk
- btrfs_finish_chunk_alloc
- btrfs_alloc_chunk
- init_first_rw_device
- btrfs_chunk_max_errors
- btrfs_chunk_readonly
- btrfs_mapping_tree_free
- btrfs_num_copies
- btrfs_full_stripe_len
- btrfs_is_parity_mirror
- find_live_mirror
- parity_smaller
- sort_parity_stripes
- alloc_btrfs_bio
- btrfs_get_bbio
- btrfs_put_bbio
- __btrfs_map_block_for_discard
- get_extra_mirror_from_replace
- handle_ops_on_dev_replace
- need_full_stripe
- btrfs_get_io_geometry
- __btrfs_map_block
- btrfs_map_block
- btrfs_map_sblock
- btrfs_rmap_block
- btrfs_end_bbio
- btrfs_end_bio
- btrfs_schedule_bio
- submit_stripe_bio
- bbio_error
- btrfs_map_bio
- btrfs_find_device
- add_missing_dev
- btrfs_alloc_device
- btrfs_report_missing_device
- calc_stripe_length
- read_one_chunk
- fill_device_from_item
- open_seed_devices
- read_one_dev
- btrfs_read_sys_array
- btrfs_check_rw_degradable
- btrfs_read_chunk_tree
- btrfs_init_devices_late
- btrfs_dev_stats_value
- btrfs_set_dev_stats_value
- btrfs_init_dev_stats
- update_dev_stat_item
- btrfs_run_dev_stats
- btrfs_dev_stat_inc_and_print
- btrfs_dev_stat_print_on_error
- btrfs_dev_stat_print_on_load
- btrfs_get_dev_stats
- btrfs_scratch_superblocks
- btrfs_commit_device_sizes
- btrfs_set_fs_info_ptr
- btrfs_reset_fs_info_ptr
- btrfs_bg_type_to_factor
- verify_one_dev_extent
- verify_chunk_dev_extent_mapping
- btrfs_verify_dev_extents
- btrfs_pinned_by_swapfile
1
2
3
4
5
6 #include <linux/sched.h>
7 #include <linux/bio.h>
8 #include <linux/slab.h>
9 #include <linux/buffer_head.h>
10 #include <linux/blkdev.h>
11 #include <linux/ratelimit.h>
12 #include <linux/kthread.h>
13 #include <linux/raid/pq.h>
14 #include <linux/semaphore.h>
15 #include <linux/uuid.h>
16 #include <linux/list_sort.h>
17 #include "misc.h"
18 #include "ctree.h"
19 #include "extent_map.h"
20 #include "disk-io.h"
21 #include "transaction.h"
22 #include "print-tree.h"
23 #include "volumes.h"
24 #include "raid56.h"
25 #include "async-thread.h"
26 #include "check-integrity.h"
27 #include "rcu-string.h"
28 #include "dev-replace.h"
29 #include "sysfs.h"
30 #include "tree-checker.h"
31 #include "space-info.h"
32 #include "block-group.h"
33
34 const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
35 [BTRFS_RAID_RAID10] = {
36 .sub_stripes = 2,
37 .dev_stripes = 1,
38 .devs_max = 0,
39 .devs_min = 4,
40 .tolerated_failures = 1,
41 .devs_increment = 2,
42 .ncopies = 2,
43 .nparity = 0,
44 .raid_name = "raid10",
45 .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
46 .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
47 },
48 [BTRFS_RAID_RAID1] = {
49 .sub_stripes = 1,
50 .dev_stripes = 1,
51 .devs_max = 2,
52 .devs_min = 2,
53 .tolerated_failures = 1,
54 .devs_increment = 2,
55 .ncopies = 2,
56 .nparity = 0,
57 .raid_name = "raid1",
58 .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
59 .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
60 },
61 [BTRFS_RAID_DUP] = {
62 .sub_stripes = 1,
63 .dev_stripes = 2,
64 .devs_max = 1,
65 .devs_min = 1,
66 .tolerated_failures = 0,
67 .devs_increment = 1,
68 .ncopies = 2,
69 .nparity = 0,
70 .raid_name = "dup",
71 .bg_flag = BTRFS_BLOCK_GROUP_DUP,
72 .mindev_error = 0,
73 },
74 [BTRFS_RAID_RAID0] = {
75 .sub_stripes = 1,
76 .dev_stripes = 1,
77 .devs_max = 0,
78 .devs_min = 2,
79 .tolerated_failures = 0,
80 .devs_increment = 1,
81 .ncopies = 1,
82 .nparity = 0,
83 .raid_name = "raid0",
84 .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
85 .mindev_error = 0,
86 },
87 [BTRFS_RAID_SINGLE] = {
88 .sub_stripes = 1,
89 .dev_stripes = 1,
90 .devs_max = 1,
91 .devs_min = 1,
92 .tolerated_failures = 0,
93 .devs_increment = 1,
94 .ncopies = 1,
95 .nparity = 0,
96 .raid_name = "single",
97 .bg_flag = 0,
98 .mindev_error = 0,
99 },
100 [BTRFS_RAID_RAID5] = {
101 .sub_stripes = 1,
102 .dev_stripes = 1,
103 .devs_max = 0,
104 .devs_min = 2,
105 .tolerated_failures = 1,
106 .devs_increment = 1,
107 .ncopies = 1,
108 .nparity = 1,
109 .raid_name = "raid5",
110 .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
111 .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
112 },
113 [BTRFS_RAID_RAID6] = {
114 .sub_stripes = 1,
115 .dev_stripes = 1,
116 .devs_max = 0,
117 .devs_min = 3,
118 .tolerated_failures = 2,
119 .devs_increment = 1,
120 .ncopies = 1,
121 .nparity = 2,
122 .raid_name = "raid6",
123 .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
124 .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
125 },
126 };
127
128 const char *btrfs_bg_type_to_raid_name(u64 flags)
129 {
130 const int index = btrfs_bg_flags_to_raid_index(flags);
131
132 if (index >= BTRFS_NR_RAID_TYPES)
133 return NULL;
134
135 return btrfs_raid_array[index].raid_name;
136 }
137
138
139
140
141
142 void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
143 {
144 int i;
145 int ret;
146 char *bp = buf;
147 u64 flags = bg_flags;
148 u32 size_bp = size_buf;
149
150 if (!flags) {
151 strcpy(bp, "NONE");
152 return;
153 }
154
155 #define DESCRIBE_FLAG(flag, desc) \
156 do { \
157 if (flags & (flag)) { \
158 ret = snprintf(bp, size_bp, "%s|", (desc)); \
159 if (ret < 0 || ret >= size_bp) \
160 goto out_overflow; \
161 size_bp -= ret; \
162 bp += ret; \
163 flags &= ~(flag); \
164 } \
165 } while (0)
166
167 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
168 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
169 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
170
171 DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
172 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
173 DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag,
174 btrfs_raid_array[i].raid_name);
175 #undef DESCRIBE_FLAG
176
177 if (flags) {
178 ret = snprintf(bp, size_bp, "0x%llx|", flags);
179 size_bp -= ret;
180 }
181
182 if (size_bp < size_buf)
183 buf[size_buf - size_bp - 1] = '\0';
184
185
186
187
188
189 out_overflow:;
190 }
191
192 static int init_first_rw_device(struct btrfs_trans_handle *trans);
193 static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
194 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
195 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
196 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
197 enum btrfs_map_op op,
198 u64 logical, u64 *length,
199 struct btrfs_bio **bbio_ret,
200 int mirror_num, int need_raid_map);
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298 DEFINE_MUTEX(uuid_mutex);
299 static LIST_HEAD(fs_uuids);
300 struct list_head *btrfs_get_fs_uuids(void)
301 {
302 return &fs_uuids;
303 }
304
305
306
307
308
309
310
311
312
313
314 static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
315 const u8 *metadata_fsid)
316 {
317 struct btrfs_fs_devices *fs_devs;
318
319 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
320 if (!fs_devs)
321 return ERR_PTR(-ENOMEM);
322
323 mutex_init(&fs_devs->device_list_mutex);
324
325 INIT_LIST_HEAD(&fs_devs->devices);
326 INIT_LIST_HEAD(&fs_devs->alloc_list);
327 INIT_LIST_HEAD(&fs_devs->fs_list);
328 if (fsid)
329 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
330
331 if (metadata_fsid)
332 memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
333 else if (fsid)
334 memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
335
336 return fs_devs;
337 }
338
339 void btrfs_free_device(struct btrfs_device *device)
340 {
341 WARN_ON(!list_empty(&device->post_commit_list));
342 rcu_string_free(device->name);
343 extent_io_tree_release(&device->alloc_state);
344 bio_put(device->flush_bio);
345 kfree(device);
346 }
347
348 static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
349 {
350 struct btrfs_device *device;
351 WARN_ON(fs_devices->opened);
352 while (!list_empty(&fs_devices->devices)) {
353 device = list_entry(fs_devices->devices.next,
354 struct btrfs_device, dev_list);
355 list_del(&device->dev_list);
356 btrfs_free_device(device);
357 }
358 kfree(fs_devices);
359 }
360
361 void __exit btrfs_cleanup_fs_uuids(void)
362 {
363 struct btrfs_fs_devices *fs_devices;
364
365 while (!list_empty(&fs_uuids)) {
366 fs_devices = list_entry(fs_uuids.next,
367 struct btrfs_fs_devices, fs_list);
368 list_del(&fs_devices->fs_list);
369 free_fs_devices(fs_devices);
370 }
371 }
372
373
374
375
376
377
378 static struct btrfs_device *__alloc_device(void)
379 {
380 struct btrfs_device *dev;
381
382 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
383 if (!dev)
384 return ERR_PTR(-ENOMEM);
385
386
387
388
389
390 dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
391 if (!dev->flush_bio) {
392 kfree(dev);
393 return ERR_PTR(-ENOMEM);
394 }
395
396 INIT_LIST_HEAD(&dev->dev_list);
397 INIT_LIST_HEAD(&dev->dev_alloc_list);
398 INIT_LIST_HEAD(&dev->post_commit_list);
399
400 spin_lock_init(&dev->io_lock);
401
402 atomic_set(&dev->reada_in_flight, 0);
403 atomic_set(&dev->dev_stats_ccnt, 0);
404 btrfs_device_data_ordered_init(dev);
405 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
406 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
407 extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
408
409 return dev;
410 }
411
412 static noinline struct btrfs_fs_devices *find_fsid(
413 const u8 *fsid, const u8 *metadata_fsid)
414 {
415 struct btrfs_fs_devices *fs_devices;
416
417 ASSERT(fsid);
418
419 if (metadata_fsid) {
420
421
422
423
424
425
426 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
427 if (fs_devices->fsid_change &&
428 memcmp(metadata_fsid, fs_devices->fsid,
429 BTRFS_FSID_SIZE) == 0 &&
430 memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
431 BTRFS_FSID_SIZE) == 0) {
432 return fs_devices;
433 }
434 }
435
436
437
438
439
440
441 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
442 if (fs_devices->fsid_change &&
443 memcmp(fs_devices->metadata_uuid,
444 fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
445 memcmp(metadata_fsid, fs_devices->metadata_uuid,
446 BTRFS_FSID_SIZE) == 0) {
447 return fs_devices;
448 }
449 }
450 }
451
452
453 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
454 if (metadata_fsid) {
455 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
456 && memcmp(metadata_fsid, fs_devices->metadata_uuid,
457 BTRFS_FSID_SIZE) == 0)
458 return fs_devices;
459 } else {
460 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
461 return fs_devices;
462 }
463 }
464 return NULL;
465 }
466
467 static int
468 btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
469 int flush, struct block_device **bdev,
470 struct buffer_head **bh)
471 {
472 int ret;
473
474 *bdev = blkdev_get_by_path(device_path, flags, holder);
475
476 if (IS_ERR(*bdev)) {
477 ret = PTR_ERR(*bdev);
478 goto error;
479 }
480
481 if (flush)
482 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
483 ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
484 if (ret) {
485 blkdev_put(*bdev, flags);
486 goto error;
487 }
488 invalidate_bdev(*bdev);
489 *bh = btrfs_read_dev_super(*bdev);
490 if (IS_ERR(*bh)) {
491 ret = PTR_ERR(*bh);
492 blkdev_put(*bdev, flags);
493 goto error;
494 }
495
496 return 0;
497
498 error:
499 *bdev = NULL;
500 *bh = NULL;
501 return ret;
502 }
503
504 static void requeue_list(struct btrfs_pending_bios *pending_bios,
505 struct bio *head, struct bio *tail)
506 {
507
508 struct bio *old_head;
509
510 old_head = pending_bios->head;
511 pending_bios->head = head;
512 if (pending_bios->tail)
513 tail->bi_next = old_head;
514 else
515 pending_bios->tail = tail;
516 }
517
518
519
520
521
522
523
524
525
526
527
528
529 static noinline void run_scheduled_bios(struct btrfs_device *device)
530 {
531 struct btrfs_fs_info *fs_info = device->fs_info;
532 struct bio *pending;
533 struct backing_dev_info *bdi;
534 struct btrfs_pending_bios *pending_bios;
535 struct bio *tail;
536 struct bio *cur;
537 int again = 0;
538 unsigned long num_run;
539 unsigned long batch_run = 0;
540 unsigned long last_waited = 0;
541 int force_reg = 0;
542 int sync_pending = 0;
543 struct blk_plug plug;
544
545
546
547
548
549
550
551 blk_start_plug(&plug);
552
553 bdi = device->bdev->bd_bdi;
554
555 loop:
556 spin_lock(&device->io_lock);
557
558 loop_lock:
559 num_run = 0;
560
561
562
563
564
565
566 if (!force_reg && device->pending_sync_bios.head) {
567 pending_bios = &device->pending_sync_bios;
568 force_reg = 1;
569 } else {
570 pending_bios = &device->pending_bios;
571 force_reg = 0;
572 }
573
574 pending = pending_bios->head;
575 tail = pending_bios->tail;
576 WARN_ON(pending && !tail);
577
578
579
580
581
582
583
584
585
586 if (device->pending_sync_bios.head == NULL &&
587 device->pending_bios.head == NULL) {
588 again = 0;
589 device->running_pending = 0;
590 } else {
591 again = 1;
592 device->running_pending = 1;
593 }
594
595 pending_bios->head = NULL;
596 pending_bios->tail = NULL;
597
598 spin_unlock(&device->io_lock);
599
600 while (pending) {
601
602 rmb();
603
604
605
606 if ((num_run > 32 &&
607 pending_bios != &device->pending_sync_bios &&
608 device->pending_sync_bios.head) ||
609 (num_run > 64 && pending_bios == &device->pending_sync_bios &&
610 device->pending_bios.head)) {
611 spin_lock(&device->io_lock);
612 requeue_list(pending_bios, pending, tail);
613 goto loop_lock;
614 }
615
616 cur = pending;
617 pending = pending->bi_next;
618 cur->bi_next = NULL;
619
620 BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
621
622
623
624
625
626
627
628
629
630 if (pending_bios == &device->pending_sync_bios) {
631 sync_pending = 1;
632 } else if (sync_pending) {
633 blk_finish_plug(&plug);
634 blk_start_plug(&plug);
635 sync_pending = 0;
636 }
637
638 btrfsic_submit_bio(cur);
639 num_run++;
640 batch_run++;
641
642 cond_resched();
643
644
645
646
647
648
649 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
650 fs_info->fs_devices->open_devices > 1) {
651 struct io_context *ioc;
652
653 ioc = current->io_context;
654
655
656
657
658
659
660
661
662
663
664 if (ioc && ioc->nr_batch_requests > 0 &&
665 time_before(jiffies, ioc->last_waited + HZ/50UL) &&
666 (last_waited == 0 ||
667 ioc->last_waited == last_waited)) {
668
669
670
671
672
673
674 last_waited = ioc->last_waited;
675 cond_resched();
676 continue;
677 }
678 spin_lock(&device->io_lock);
679 requeue_list(pending_bios, pending, tail);
680 device->running_pending = 1;
681
682 spin_unlock(&device->io_lock);
683 btrfs_queue_work(fs_info->submit_workers,
684 &device->work);
685 goto done;
686 }
687 }
688
689 cond_resched();
690 if (again)
691 goto loop;
692
693 spin_lock(&device->io_lock);
694 if (device->pending_bios.head || device->pending_sync_bios.head)
695 goto loop_lock;
696 spin_unlock(&device->io_lock);
697
698 done:
699 blk_finish_plug(&plug);
700 }
701
702 static void pending_bios_fn(struct btrfs_work *work)
703 {
704 struct btrfs_device *device;
705
706 device = container_of(work, struct btrfs_device, work);
707 run_scheduled_bios(device);
708 }
709
710 static bool device_path_matched(const char *path, struct btrfs_device *device)
711 {
712 int found;
713
714 rcu_read_lock();
715 found = strcmp(rcu_str_deref(device->name), path);
716 rcu_read_unlock();
717
718 return found == 0;
719 }
720
721
722
723
724
725
726
727
728
729
730
731
732 static int btrfs_free_stale_devices(const char *path,
733 struct btrfs_device *skip_device)
734 {
735 struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
736 struct btrfs_device *device, *tmp_device;
737 int ret = 0;
738
739 if (path)
740 ret = -ENOENT;
741
742 list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
743
744 mutex_lock(&fs_devices->device_list_mutex);
745 list_for_each_entry_safe(device, tmp_device,
746 &fs_devices->devices, dev_list) {
747 if (skip_device && skip_device == device)
748 continue;
749 if (path && !device->name)
750 continue;
751 if (path && !device_path_matched(path, device))
752 continue;
753 if (fs_devices->opened) {
754
755 if (path && ret != 0)
756 ret = -EBUSY;
757 break;
758 }
759
760
761 fs_devices->num_devices--;
762 list_del(&device->dev_list);
763 btrfs_free_device(device);
764
765 ret = 0;
766 if (fs_devices->num_devices == 0)
767 break;
768 }
769 mutex_unlock(&fs_devices->device_list_mutex);
770
771 if (fs_devices->num_devices == 0) {
772 btrfs_sysfs_remove_fsid(fs_devices);
773 list_del(&fs_devices->fs_list);
774 free_fs_devices(fs_devices);
775 }
776 }
777
778 return ret;
779 }
780
781 static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
782 struct btrfs_device *device, fmode_t flags,
783 void *holder)
784 {
785 struct request_queue *q;
786 struct block_device *bdev;
787 struct buffer_head *bh;
788 struct btrfs_super_block *disk_super;
789 u64 devid;
790 int ret;
791
792 if (device->bdev)
793 return -EINVAL;
794 if (!device->name)
795 return -EINVAL;
796
797 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
798 &bdev, &bh);
799 if (ret)
800 return ret;
801
802 disk_super = (struct btrfs_super_block *)bh->b_data;
803 devid = btrfs_stack_device_id(&disk_super->dev_item);
804 if (devid != device->devid)
805 goto error_brelse;
806
807 if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
808 goto error_brelse;
809
810 device->generation = btrfs_super_generation(disk_super);
811
812 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
813 if (btrfs_super_incompat_flags(disk_super) &
814 BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
815 pr_err(
816 "BTRFS: Invalid seeding and uuid-changed device detected\n");
817 goto error_brelse;
818 }
819
820 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
821 fs_devices->seeding = 1;
822 } else {
823 if (bdev_read_only(bdev))
824 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
825 else
826 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
827 }
828
829 q = bdev_get_queue(bdev);
830 if (!blk_queue_nonrot(q))
831 fs_devices->rotating = 1;
832
833 device->bdev = bdev;
834 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
835 device->mode = flags;
836
837 fs_devices->open_devices++;
838 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
839 device->devid != BTRFS_DEV_REPLACE_DEVID) {
840 fs_devices->rw_devices++;
841 list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
842 }
843 brelse(bh);
844
845 return 0;
846
847 error_brelse:
848 brelse(bh);
849 blkdev_put(bdev, flags);
850
851 return -EINVAL;
852 }
853
854
855
856
857
858 static struct btrfs_fs_devices *find_fsid_inprogress(
859 struct btrfs_super_block *disk_super)
860 {
861 struct btrfs_fs_devices *fs_devices;
862
863 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
864 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
865 BTRFS_FSID_SIZE) != 0 &&
866 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
867 BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
868 return fs_devices;
869 }
870 }
871
872 return NULL;
873 }
874
875
876 static struct btrfs_fs_devices *find_fsid_changed(
877 struct btrfs_super_block *disk_super)
878 {
879 struct btrfs_fs_devices *fs_devices;
880
881
882
883
884
885
886
887
888
889
890 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
891
892 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
893 BTRFS_FSID_SIZE) != 0 &&
894 memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
895 BTRFS_FSID_SIZE) == 0 &&
896 memcmp(fs_devices->fsid, disk_super->fsid,
897 BTRFS_FSID_SIZE) != 0)
898 return fs_devices;
899
900
901 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
902 BTRFS_FSID_SIZE) == 0 &&
903 memcmp(fs_devices->fsid, disk_super->metadata_uuid,
904 BTRFS_FSID_SIZE) == 0)
905 return fs_devices;
906 }
907
908 return NULL;
909 }
910
911 static struct btrfs_fs_devices *find_fsid_reverted_metadata(
912 struct btrfs_super_block *disk_super)
913 {
914 struct btrfs_fs_devices *fs_devices;
915
916
917
918
919
920
921
922
923
924
925 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
926 if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
927 BTRFS_FSID_SIZE) != 0 &&
928 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
929 BTRFS_FSID_SIZE) == 0 &&
930 fs_devices->fsid_change)
931 return fs_devices;
932 }
933
934 return NULL;
935 }
936
937
938
939
940
941
942
943 static noinline struct btrfs_device *device_list_add(const char *path,
944 struct btrfs_super_block *disk_super,
945 bool *new_device_added)
946 {
947 struct btrfs_device *device;
948 struct btrfs_fs_devices *fs_devices = NULL;
949 struct rcu_string *name;
950 u64 found_transid = btrfs_super_generation(disk_super);
951 u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
952 bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
953 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
954 bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
955 BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
956
957 if (fsid_change_in_progress) {
958 if (!has_metadata_uuid) {
959
960
961
962
963
964
965 fs_devices = find_fsid_inprogress(disk_super);
966 if (!fs_devices)
967 fs_devices = find_fsid(disk_super->fsid, NULL);
968 } else {
969 fs_devices = find_fsid_changed(disk_super);
970 }
971 } else if (has_metadata_uuid) {
972 fs_devices = find_fsid(disk_super->fsid,
973 disk_super->metadata_uuid);
974 } else {
975 fs_devices = find_fsid_reverted_metadata(disk_super);
976 if (!fs_devices)
977 fs_devices = find_fsid(disk_super->fsid, NULL);
978 }
979
980
981 if (!fs_devices) {
982 if (has_metadata_uuid)
983 fs_devices = alloc_fs_devices(disk_super->fsid,
984 disk_super->metadata_uuid);
985 else
986 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
987
988 if (IS_ERR(fs_devices))
989 return ERR_CAST(fs_devices);
990
991 fs_devices->fsid_change = fsid_change_in_progress;
992
993 mutex_lock(&fs_devices->device_list_mutex);
994 list_add(&fs_devices->fs_list, &fs_uuids);
995
996 device = NULL;
997 } else {
998 mutex_lock(&fs_devices->device_list_mutex);
999 device = btrfs_find_device(fs_devices, devid,
1000 disk_super->dev_item.uuid, NULL, false);
1001
1002
1003
1004
1005
1006
1007 if (fs_devices->fsid_change &&
1008 found_transid > fs_devices->latest_generation) {
1009 memcpy(fs_devices->fsid, disk_super->fsid,
1010 BTRFS_FSID_SIZE);
1011
1012 if (has_metadata_uuid)
1013 memcpy(fs_devices->metadata_uuid,
1014 disk_super->metadata_uuid,
1015 BTRFS_FSID_SIZE);
1016 else
1017 memcpy(fs_devices->metadata_uuid,
1018 disk_super->fsid, BTRFS_FSID_SIZE);
1019
1020 fs_devices->fsid_change = false;
1021 }
1022 }
1023
1024 if (!device) {
1025 if (fs_devices->opened) {
1026 mutex_unlock(&fs_devices->device_list_mutex);
1027 return ERR_PTR(-EBUSY);
1028 }
1029
1030 device = btrfs_alloc_device(NULL, &devid,
1031 disk_super->dev_item.uuid);
1032 if (IS_ERR(device)) {
1033 mutex_unlock(&fs_devices->device_list_mutex);
1034
1035 return device;
1036 }
1037
1038 name = rcu_string_strdup(path, GFP_NOFS);
1039 if (!name) {
1040 btrfs_free_device(device);
1041 mutex_unlock(&fs_devices->device_list_mutex);
1042 return ERR_PTR(-ENOMEM);
1043 }
1044 rcu_assign_pointer(device->name, name);
1045
1046 list_add_rcu(&device->dev_list, &fs_devices->devices);
1047 fs_devices->num_devices++;
1048
1049 device->fs_devices = fs_devices;
1050 *new_device_added = true;
1051
1052 if (disk_super->label[0])
1053 pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
1054 disk_super->label, devid, found_transid, path);
1055 else
1056 pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n",
1057 disk_super->fsid, devid, found_transid, path);
1058
1059 } else if (!device->name || strcmp(device->name->str, path)) {
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086 if (!fs_devices->opened && found_transid < device->generation) {
1087
1088
1089
1090
1091
1092
1093
1094 mutex_unlock(&fs_devices->device_list_mutex);
1095 return ERR_PTR(-EEXIST);
1096 }
1097
1098
1099
1100
1101
1102 if (device->bdev) {
1103 struct block_device *path_bdev;
1104
1105 path_bdev = lookup_bdev(path);
1106 if (IS_ERR(path_bdev)) {
1107 mutex_unlock(&fs_devices->device_list_mutex);
1108 return ERR_CAST(path_bdev);
1109 }
1110
1111 if (device->bdev != path_bdev) {
1112 bdput(path_bdev);
1113 mutex_unlock(&fs_devices->device_list_mutex);
1114 btrfs_warn_in_rcu(device->fs_info,
1115 "duplicate device fsid:devid for %pU:%llu old:%s new:%s",
1116 disk_super->fsid, devid,
1117 rcu_str_deref(device->name), path);
1118 return ERR_PTR(-EEXIST);
1119 }
1120 bdput(path_bdev);
1121 btrfs_info_in_rcu(device->fs_info,
1122 "device fsid %pU devid %llu moved old:%s new:%s",
1123 disk_super->fsid, devid,
1124 rcu_str_deref(device->name), path);
1125 }
1126
1127 name = rcu_string_strdup(path, GFP_NOFS);
1128 if (!name) {
1129 mutex_unlock(&fs_devices->device_list_mutex);
1130 return ERR_PTR(-ENOMEM);
1131 }
1132 rcu_string_free(device->name);
1133 rcu_assign_pointer(device->name, name);
1134 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
1135 fs_devices->missing_devices--;
1136 clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
1137 }
1138 }
1139
1140
1141
1142
1143
1144
1145
1146 if (!fs_devices->opened) {
1147 device->generation = found_transid;
1148 fs_devices->latest_generation = max_t(u64, found_transid,
1149 fs_devices->latest_generation);
1150 }
1151
1152 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
1153
1154 mutex_unlock(&fs_devices->device_list_mutex);
1155 return device;
1156 }
1157
1158 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
1159 {
1160 struct btrfs_fs_devices *fs_devices;
1161 struct btrfs_device *device;
1162 struct btrfs_device *orig_dev;
1163 int ret = 0;
1164
1165 fs_devices = alloc_fs_devices(orig->fsid, NULL);
1166 if (IS_ERR(fs_devices))
1167 return fs_devices;
1168
1169 mutex_lock(&orig->device_list_mutex);
1170 fs_devices->total_devices = orig->total_devices;
1171
1172 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
1173 struct rcu_string *name;
1174
1175 device = btrfs_alloc_device(NULL, &orig_dev->devid,
1176 orig_dev->uuid);
1177 if (IS_ERR(device)) {
1178 ret = PTR_ERR(device);
1179 goto error;
1180 }
1181
1182
1183
1184
1185
1186 if (orig_dev->name) {
1187 name = rcu_string_strdup(orig_dev->name->str,
1188 GFP_KERNEL);
1189 if (!name) {
1190 btrfs_free_device(device);
1191 ret = -ENOMEM;
1192 goto error;
1193 }
1194 rcu_assign_pointer(device->name, name);
1195 }
1196
1197 list_add(&device->dev_list, &fs_devices->devices);
1198 device->fs_devices = fs_devices;
1199 fs_devices->num_devices++;
1200 }
1201 mutex_unlock(&orig->device_list_mutex);
1202 return fs_devices;
1203 error:
1204 mutex_unlock(&orig->device_list_mutex);
1205 free_fs_devices(fs_devices);
1206 return ERR_PTR(ret);
1207 }
1208
1209
1210
1211
1212
1213 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
1214 {
1215 struct btrfs_device *device, *next;
1216 struct btrfs_device *latest_dev = NULL;
1217
1218 mutex_lock(&uuid_mutex);
1219 again:
1220
1221 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
1222 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
1223 &device->dev_state)) {
1224 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1225 &device->dev_state) &&
1226 (!latest_dev ||
1227 device->generation > latest_dev->generation)) {
1228 latest_dev = device;
1229 }
1230 continue;
1231 }
1232
1233 if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244 if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1245 &device->dev_state)) {
1246 continue;
1247 }
1248 }
1249 if (device->bdev) {
1250 blkdev_put(device->bdev, device->mode);
1251 device->bdev = NULL;
1252 fs_devices->open_devices--;
1253 }
1254 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1255 list_del_init(&device->dev_alloc_list);
1256 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1257 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1258 &device->dev_state))
1259 fs_devices->rw_devices--;
1260 }
1261 list_del_init(&device->dev_list);
1262 fs_devices->num_devices--;
1263 btrfs_free_device(device);
1264 }
1265
1266 if (fs_devices->seed) {
1267 fs_devices = fs_devices->seed;
1268 goto again;
1269 }
1270
1271 fs_devices->latest_bdev = latest_dev->bdev;
1272
1273 mutex_unlock(&uuid_mutex);
1274 }
1275
1276 static void btrfs_close_bdev(struct btrfs_device *device)
1277 {
1278 if (!device->bdev)
1279 return;
1280
1281 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1282 sync_blockdev(device->bdev);
1283 invalidate_bdev(device->bdev);
1284 }
1285
1286 blkdev_put(device->bdev, device->mode);
1287 }
1288
1289 static void btrfs_close_one_device(struct btrfs_device *device)
1290 {
1291 struct btrfs_fs_devices *fs_devices = device->fs_devices;
1292 struct btrfs_device *new_device;
1293 struct rcu_string *name;
1294
1295 if (device->bdev)
1296 fs_devices->open_devices--;
1297
1298 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1299 device->devid != BTRFS_DEV_REPLACE_DEVID) {
1300 list_del_init(&device->dev_alloc_list);
1301 fs_devices->rw_devices--;
1302 }
1303
1304 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1305 fs_devices->missing_devices--;
1306
1307 btrfs_close_bdev(device);
1308
1309 new_device = btrfs_alloc_device(NULL, &device->devid,
1310 device->uuid);
1311 BUG_ON(IS_ERR(new_device));
1312
1313
1314 if (device->name) {
1315 name = rcu_string_strdup(device->name->str, GFP_NOFS);
1316 BUG_ON(!name);
1317 rcu_assign_pointer(new_device->name, name);
1318 }
1319
1320 list_replace_rcu(&device->dev_list, &new_device->dev_list);
1321 new_device->fs_devices = device->fs_devices;
1322
1323 synchronize_rcu();
1324 btrfs_free_device(device);
1325 }
1326
1327 static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
1328 {
1329 struct btrfs_device *device, *tmp;
1330
1331 if (--fs_devices->opened > 0)
1332 return 0;
1333
1334 mutex_lock(&fs_devices->device_list_mutex);
1335 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
1336 btrfs_close_one_device(device);
1337 }
1338 mutex_unlock(&fs_devices->device_list_mutex);
1339
1340 WARN_ON(fs_devices->open_devices);
1341 WARN_ON(fs_devices->rw_devices);
1342 fs_devices->opened = 0;
1343 fs_devices->seeding = 0;
1344
1345 return 0;
1346 }
1347
1348 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1349 {
1350 struct btrfs_fs_devices *seed_devices = NULL;
1351 int ret;
1352
1353 mutex_lock(&uuid_mutex);
1354 ret = close_fs_devices(fs_devices);
1355 if (!fs_devices->opened) {
1356 seed_devices = fs_devices->seed;
1357 fs_devices->seed = NULL;
1358 }
1359 mutex_unlock(&uuid_mutex);
1360
1361 while (seed_devices) {
1362 fs_devices = seed_devices;
1363 seed_devices = fs_devices->seed;
1364 close_fs_devices(fs_devices);
1365 free_fs_devices(fs_devices);
1366 }
1367 return ret;
1368 }
1369
1370 static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
1371 fmode_t flags, void *holder)
1372 {
1373 struct btrfs_device *device;
1374 struct btrfs_device *latest_dev = NULL;
1375 int ret = 0;
1376
1377 flags |= FMODE_EXCL;
1378
1379 list_for_each_entry(device, &fs_devices->devices, dev_list) {
1380
1381 if (btrfs_open_one_device(fs_devices, device, flags, holder))
1382 continue;
1383
1384 if (!latest_dev ||
1385 device->generation > latest_dev->generation)
1386 latest_dev = device;
1387 }
1388 if (fs_devices->open_devices == 0) {
1389 ret = -EINVAL;
1390 goto out;
1391 }
1392 fs_devices->opened = 1;
1393 fs_devices->latest_bdev = latest_dev->bdev;
1394 fs_devices->total_rw_bytes = 0;
1395 out:
1396 return ret;
1397 }
1398
1399 static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
1400 {
1401 struct btrfs_device *dev1, *dev2;
1402
1403 dev1 = list_entry(a, struct btrfs_device, dev_list);
1404 dev2 = list_entry(b, struct btrfs_device, dev_list);
1405
1406 if (dev1->devid < dev2->devid)
1407 return -1;
1408 else if (dev1->devid > dev2->devid)
1409 return 1;
1410 return 0;
1411 }
1412
1413 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1414 fmode_t flags, void *holder)
1415 {
1416 int ret;
1417
1418 lockdep_assert_held(&uuid_mutex);
1419
1420 mutex_lock(&fs_devices->device_list_mutex);
1421 if (fs_devices->opened) {
1422 fs_devices->opened++;
1423 ret = 0;
1424 } else {
1425 list_sort(NULL, &fs_devices->devices, devid_cmp);
1426 ret = open_fs_devices(fs_devices, flags, holder);
1427 }
1428 mutex_unlock(&fs_devices->device_list_mutex);
1429
1430 return ret;
1431 }
1432
1433 static void btrfs_release_disk_super(struct page *page)
1434 {
1435 kunmap(page);
1436 put_page(page);
1437 }
1438
1439 static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
1440 struct page **page,
1441 struct btrfs_super_block **disk_super)
1442 {
1443 void *p;
1444 pgoff_t index;
1445
1446
1447 if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
1448 return 1;
1449
1450
1451 if (sizeof(**disk_super) > PAGE_SIZE)
1452 return 1;
1453
1454
1455 index = bytenr >> PAGE_SHIFT;
1456 if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
1457 return 1;
1458
1459
1460 *page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
1461 index, GFP_KERNEL);
1462
1463 if (IS_ERR_OR_NULL(*page))
1464 return 1;
1465
1466 p = kmap(*page);
1467
1468
1469 *disk_super = p + offset_in_page(bytenr);
1470
1471 if (btrfs_super_bytenr(*disk_super) != bytenr ||
1472 btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
1473 btrfs_release_disk_super(*page);
1474 return 1;
1475 }
1476
1477 if ((*disk_super)->label[0] &&
1478 (*disk_super)->label[BTRFS_LABEL_SIZE - 1])
1479 (*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
1480
1481 return 0;
1482 }
1483
1484 int btrfs_forget_devices(const char *path)
1485 {
1486 int ret;
1487
1488 mutex_lock(&uuid_mutex);
1489 ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
1490 mutex_unlock(&uuid_mutex);
1491
1492 return ret;
1493 }
1494
1495
1496
1497
1498
1499
1500 struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
1501 void *holder)
1502 {
1503 struct btrfs_super_block *disk_super;
1504 bool new_device_added = false;
1505 struct btrfs_device *device = NULL;
1506 struct block_device *bdev;
1507 struct page *page;
1508 u64 bytenr;
1509
1510 lockdep_assert_held(&uuid_mutex);
1511
1512
1513
1514
1515
1516
1517
1518 bytenr = btrfs_sb_offset(0);
1519 flags |= FMODE_EXCL;
1520
1521 bdev = blkdev_get_by_path(path, flags, holder);
1522 if (IS_ERR(bdev))
1523 return ERR_CAST(bdev);
1524
1525 if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
1526 device = ERR_PTR(-EINVAL);
1527 goto error_bdev_put;
1528 }
1529
1530 device = device_list_add(path, disk_super, &new_device_added);
1531 if (!IS_ERR(device)) {
1532 if (new_device_added)
1533 btrfs_free_stale_devices(path, device);
1534 }
1535
1536 btrfs_release_disk_super(page);
1537
1538 error_bdev_put:
1539 blkdev_put(bdev, flags);
1540
1541 return device;
1542 }
1543
1544
1545
1546
1547
1548 static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
1549 u64 len)
1550 {
1551 u64 physical_start, physical_end;
1552
1553 lockdep_assert_held(&device->fs_info->chunk_mutex);
1554
1555 if (!find_first_extent_bit(&device->alloc_state, *start,
1556 &physical_start, &physical_end,
1557 CHUNK_ALLOCATED, NULL)) {
1558
1559 if (in_range(physical_start, *start, len) ||
1560 in_range(*start, physical_start,
1561 physical_end - physical_start)) {
1562 *start = physical_end + 1;
1563 return true;
1564 }
1565 }
1566 return false;
1567 }
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597 static int find_free_dev_extent_start(struct btrfs_device *device,
1598 u64 num_bytes, u64 search_start, u64 *start,
1599 u64 *len)
1600 {
1601 struct btrfs_fs_info *fs_info = device->fs_info;
1602 struct btrfs_root *root = fs_info->dev_root;
1603 struct btrfs_key key;
1604 struct btrfs_dev_extent *dev_extent;
1605 struct btrfs_path *path;
1606 u64 hole_size;
1607 u64 max_hole_start;
1608 u64 max_hole_size;
1609 u64 extent_end;
1610 u64 search_end = device->total_bytes;
1611 int ret;
1612 int slot;
1613 struct extent_buffer *l;
1614
1615
1616
1617
1618
1619
1620 search_start = max_t(u64, search_start, SZ_1M);
1621
1622 path = btrfs_alloc_path();
1623 if (!path)
1624 return -ENOMEM;
1625
1626 max_hole_start = search_start;
1627 max_hole_size = 0;
1628
1629 again:
1630 if (search_start >= search_end ||
1631 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1632 ret = -ENOSPC;
1633 goto out;
1634 }
1635
1636 path->reada = READA_FORWARD;
1637 path->search_commit_root = 1;
1638 path->skip_locking = 1;
1639
1640 key.objectid = device->devid;
1641 key.offset = search_start;
1642 key.type = BTRFS_DEV_EXTENT_KEY;
1643
1644 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1645 if (ret < 0)
1646 goto out;
1647 if (ret > 0) {
1648 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1649 if (ret < 0)
1650 goto out;
1651 }
1652
1653 while (1) {
1654 l = path->nodes[0];
1655 slot = path->slots[0];
1656 if (slot >= btrfs_header_nritems(l)) {
1657 ret = btrfs_next_leaf(root, path);
1658 if (ret == 0)
1659 continue;
1660 if (ret < 0)
1661 goto out;
1662
1663 break;
1664 }
1665 btrfs_item_key_to_cpu(l, &key, slot);
1666
1667 if (key.objectid < device->devid)
1668 goto next;
1669
1670 if (key.objectid > device->devid)
1671 break;
1672
1673 if (key.type != BTRFS_DEV_EXTENT_KEY)
1674 goto next;
1675
1676 if (key.offset > search_start) {
1677 hole_size = key.offset - search_start;
1678
1679
1680
1681
1682
1683 if (contains_pending_extent(device, &search_start,
1684 hole_size)) {
1685 if (key.offset >= search_start)
1686 hole_size = key.offset - search_start;
1687 else
1688 hole_size = 0;
1689 }
1690
1691 if (hole_size > max_hole_size) {
1692 max_hole_start = search_start;
1693 max_hole_size = hole_size;
1694 }
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705 if (hole_size >= num_bytes) {
1706 ret = 0;
1707 goto out;
1708 }
1709 }
1710
1711 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1712 extent_end = key.offset + btrfs_dev_extent_length(l,
1713 dev_extent);
1714 if (extent_end > search_start)
1715 search_start = extent_end;
1716 next:
1717 path->slots[0]++;
1718 cond_resched();
1719 }
1720
1721
1722
1723
1724
1725
1726 if (search_end > search_start) {
1727 hole_size = search_end - search_start;
1728
1729 if (contains_pending_extent(device, &search_start, hole_size)) {
1730 btrfs_release_path(path);
1731 goto again;
1732 }
1733
1734 if (hole_size > max_hole_size) {
1735 max_hole_start = search_start;
1736 max_hole_size = hole_size;
1737 }
1738 }
1739
1740
1741 if (max_hole_size < num_bytes)
1742 ret = -ENOSPC;
1743 else
1744 ret = 0;
1745
1746 out:
1747 btrfs_free_path(path);
1748 *start = max_hole_start;
1749 if (len)
1750 *len = max_hole_size;
1751 return ret;
1752 }
1753
1754 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1755 u64 *start, u64 *len)
1756 {
1757
1758 return find_free_dev_extent_start(device, num_bytes, 0, start, len);
1759 }
1760
1761 static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1762 struct btrfs_device *device,
1763 u64 start, u64 *dev_extent_len)
1764 {
1765 struct btrfs_fs_info *fs_info = device->fs_info;
1766 struct btrfs_root *root = fs_info->dev_root;
1767 int ret;
1768 struct btrfs_path *path;
1769 struct btrfs_key key;
1770 struct btrfs_key found_key;
1771 struct extent_buffer *leaf = NULL;
1772 struct btrfs_dev_extent *extent = NULL;
1773
1774 path = btrfs_alloc_path();
1775 if (!path)
1776 return -ENOMEM;
1777
1778 key.objectid = device->devid;
1779 key.offset = start;
1780 key.type = BTRFS_DEV_EXTENT_KEY;
1781 again:
1782 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1783 if (ret > 0) {
1784 ret = btrfs_previous_item(root, path, key.objectid,
1785 BTRFS_DEV_EXTENT_KEY);
1786 if (ret)
1787 goto out;
1788 leaf = path->nodes[0];
1789 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1790 extent = btrfs_item_ptr(leaf, path->slots[0],
1791 struct btrfs_dev_extent);
1792 BUG_ON(found_key.offset > start || found_key.offset +
1793 btrfs_dev_extent_length(leaf, extent) < start);
1794 key = found_key;
1795 btrfs_release_path(path);
1796 goto again;
1797 } else if (ret == 0) {
1798 leaf = path->nodes[0];
1799 extent = btrfs_item_ptr(leaf, path->slots[0],
1800 struct btrfs_dev_extent);
1801 } else {
1802 btrfs_handle_fs_error(fs_info, ret, "Slot search failed");
1803 goto out;
1804 }
1805
1806 *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
1807
1808 ret = btrfs_del_item(trans, root, path);
1809 if (ret) {
1810 btrfs_handle_fs_error(fs_info, ret,
1811 "Failed to remove dev extent item");
1812 } else {
1813 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1814 }
1815 out:
1816 btrfs_free_path(path);
1817 return ret;
1818 }
1819
1820 static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1821 struct btrfs_device *device,
1822 u64 chunk_offset, u64 start, u64 num_bytes)
1823 {
1824 int ret;
1825 struct btrfs_path *path;
1826 struct btrfs_fs_info *fs_info = device->fs_info;
1827 struct btrfs_root *root = fs_info->dev_root;
1828 struct btrfs_dev_extent *extent;
1829 struct extent_buffer *leaf;
1830 struct btrfs_key key;
1831
1832 WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
1833 WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1834 path = btrfs_alloc_path();
1835 if (!path)
1836 return -ENOMEM;
1837
1838 key.objectid = device->devid;
1839 key.offset = start;
1840 key.type = BTRFS_DEV_EXTENT_KEY;
1841 ret = btrfs_insert_empty_item(trans, root, path, &key,
1842 sizeof(*extent));
1843 if (ret)
1844 goto out;
1845
1846 leaf = path->nodes[0];
1847 extent = btrfs_item_ptr(leaf, path->slots[0],
1848 struct btrfs_dev_extent);
1849 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1850 BTRFS_CHUNK_TREE_OBJECTID);
1851 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1852 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1853 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1854
1855 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1856 btrfs_mark_buffer_dirty(leaf);
1857 out:
1858 btrfs_free_path(path);
1859 return ret;
1860 }
1861
1862 static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1863 {
1864 struct extent_map_tree *em_tree;
1865 struct extent_map *em;
1866 struct rb_node *n;
1867 u64 ret = 0;
1868
1869 em_tree = &fs_info->mapping_tree;
1870 read_lock(&em_tree->lock);
1871 n = rb_last(&em_tree->map.rb_root);
1872 if (n) {
1873 em = rb_entry(n, struct extent_map, rb_node);
1874 ret = em->start + em->len;
1875 }
1876 read_unlock(&em_tree->lock);
1877
1878 return ret;
1879 }
1880
1881 static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1882 u64 *devid_ret)
1883 {
1884 int ret;
1885 struct btrfs_key key;
1886 struct btrfs_key found_key;
1887 struct btrfs_path *path;
1888
1889 path = btrfs_alloc_path();
1890 if (!path)
1891 return -ENOMEM;
1892
1893 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1894 key.type = BTRFS_DEV_ITEM_KEY;
1895 key.offset = (u64)-1;
1896
1897 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1898 if (ret < 0)
1899 goto error;
1900
1901 if (ret == 0) {
1902
1903 btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
1904 ret = -EUCLEAN;
1905 goto error;
1906 }
1907
1908 ret = btrfs_previous_item(fs_info->chunk_root, path,
1909 BTRFS_DEV_ITEMS_OBJECTID,
1910 BTRFS_DEV_ITEM_KEY);
1911 if (ret) {
1912 *devid_ret = 1;
1913 } else {
1914 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1915 path->slots[0]);
1916 *devid_ret = found_key.offset + 1;
1917 }
1918 ret = 0;
1919 error:
1920 btrfs_free_path(path);
1921 return ret;
1922 }
1923
1924
1925
1926
1927
1928 static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1929 struct btrfs_device *device)
1930 {
1931 int ret;
1932 struct btrfs_path *path;
1933 struct btrfs_dev_item *dev_item;
1934 struct extent_buffer *leaf;
1935 struct btrfs_key key;
1936 unsigned long ptr;
1937
1938 path = btrfs_alloc_path();
1939 if (!path)
1940 return -ENOMEM;
1941
1942 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1943 key.type = BTRFS_DEV_ITEM_KEY;
1944 key.offset = device->devid;
1945
1946 ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
1947 &key, sizeof(*dev_item));
1948 if (ret)
1949 goto out;
1950
1951 leaf = path->nodes[0];
1952 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1953
1954 btrfs_set_device_id(leaf, dev_item, device->devid);
1955 btrfs_set_device_generation(leaf, dev_item, 0);
1956 btrfs_set_device_type(leaf, dev_item, device->type);
1957 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1958 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1959 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1960 btrfs_set_device_total_bytes(leaf, dev_item,
1961 btrfs_device_get_disk_total_bytes(device));
1962 btrfs_set_device_bytes_used(leaf, dev_item,
1963 btrfs_device_get_bytes_used(device));
1964 btrfs_set_device_group(leaf, dev_item, 0);
1965 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1966 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1967 btrfs_set_device_start_offset(leaf, dev_item, 0);
1968
1969 ptr = btrfs_device_uuid(dev_item);
1970 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1971 ptr = btrfs_device_fsid(dev_item);
1972 write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
1973 ptr, BTRFS_FSID_SIZE);
1974 btrfs_mark_buffer_dirty(leaf);
1975
1976 ret = 0;
1977 out:
1978 btrfs_free_path(path);
1979 return ret;
1980 }
1981
1982
1983
1984
1985
1986 static void update_dev_time(const char *path_name)
1987 {
1988 struct file *filp;
1989
1990 filp = filp_open(path_name, O_RDWR, 0);
1991 if (IS_ERR(filp))
1992 return;
1993 file_update_time(filp);
1994 filp_close(filp, NULL);
1995 }
1996
1997 static int btrfs_rm_dev_item(struct btrfs_device *device)
1998 {
1999 struct btrfs_root *root = device->fs_info->chunk_root;
2000 int ret;
2001 struct btrfs_path *path;
2002 struct btrfs_key key;
2003 struct btrfs_trans_handle *trans;
2004
2005 path = btrfs_alloc_path();
2006 if (!path)
2007 return -ENOMEM;
2008
2009 trans = btrfs_start_transaction(root, 0);
2010 if (IS_ERR(trans)) {
2011 btrfs_free_path(path);
2012 return PTR_ERR(trans);
2013 }
2014 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2015 key.type = BTRFS_DEV_ITEM_KEY;
2016 key.offset = device->devid;
2017
2018 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2019 if (ret) {
2020 if (ret > 0)
2021 ret = -ENOENT;
2022 btrfs_abort_transaction(trans, ret);
2023 btrfs_end_transaction(trans);
2024 goto out;
2025 }
2026
2027 ret = btrfs_del_item(trans, root, path);
2028 if (ret) {
2029 btrfs_abort_transaction(trans, ret);
2030 btrfs_end_transaction(trans);
2031 }
2032
2033 out:
2034 btrfs_free_path(path);
2035 if (!ret)
2036 ret = btrfs_commit_transaction(trans);
2037 return ret;
2038 }
2039
2040
2041
2042
2043
2044
2045 static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
2046 u64 num_devices)
2047 {
2048 u64 all_avail;
2049 unsigned seq;
2050 int i;
2051
2052 do {
2053 seq = read_seqbegin(&fs_info->profiles_lock);
2054
2055 all_avail = fs_info->avail_data_alloc_bits |
2056 fs_info->avail_system_alloc_bits |
2057 fs_info->avail_metadata_alloc_bits;
2058 } while (read_seqretry(&fs_info->profiles_lock, seq));
2059
2060 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2061 if (!(all_avail & btrfs_raid_array[i].bg_flag))
2062 continue;
2063
2064 if (num_devices < btrfs_raid_array[i].devs_min) {
2065 int ret = btrfs_raid_array[i].mindev_error;
2066
2067 if (ret)
2068 return ret;
2069 }
2070 }
2071
2072 return 0;
2073 }
2074
2075 static struct btrfs_device * btrfs_find_next_active_device(
2076 struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
2077 {
2078 struct btrfs_device *next_device;
2079
2080 list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
2081 if (next_device != device &&
2082 !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
2083 && next_device->bdev)
2084 return next_device;
2085 }
2086
2087 return NULL;
2088 }
2089
2090
2091
2092
2093
2094
2095
2096 void btrfs_assign_next_active_device(struct btrfs_device *device,
2097 struct btrfs_device *this_dev)
2098 {
2099 struct btrfs_fs_info *fs_info = device->fs_info;
2100 struct btrfs_device *next_device;
2101
2102 if (this_dev)
2103 next_device = this_dev;
2104 else
2105 next_device = btrfs_find_next_active_device(fs_info->fs_devices,
2106 device);
2107 ASSERT(next_device);
2108
2109 if (fs_info->sb->s_bdev &&
2110 (fs_info->sb->s_bdev == device->bdev))
2111 fs_info->sb->s_bdev = next_device->bdev;
2112
2113 if (fs_info->fs_devices->latest_bdev == device->bdev)
2114 fs_info->fs_devices->latest_bdev = next_device->bdev;
2115 }
2116
2117
2118
2119
2120
2121 static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
2122 {
2123 u64 num_devices = fs_info->fs_devices->num_devices;
2124
2125 down_read(&fs_info->dev_replace.rwsem);
2126 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
2127 ASSERT(num_devices > 1);
2128 num_devices--;
2129 }
2130 up_read(&fs_info->dev_replace.rwsem);
2131
2132 return num_devices;
2133 }
2134
2135 int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2136 u64 devid)
2137 {
2138 struct btrfs_device *device;
2139 struct btrfs_fs_devices *cur_devices;
2140 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2141 u64 num_devices;
2142 int ret = 0;
2143
2144 mutex_lock(&uuid_mutex);
2145
2146 num_devices = btrfs_num_devices(fs_info);
2147
2148 ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
2149 if (ret)
2150 goto out;
2151
2152 device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
2153
2154 if (IS_ERR(device)) {
2155 if (PTR_ERR(device) == -ENOENT &&
2156 strcmp(device_path, "missing") == 0)
2157 ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
2158 else
2159 ret = PTR_ERR(device);
2160 goto out;
2161 }
2162
2163 if (btrfs_pinned_by_swapfile(fs_info, device)) {
2164 btrfs_warn_in_rcu(fs_info,
2165 "cannot remove device %s (devid %llu) due to active swapfile",
2166 rcu_str_deref(device->name), device->devid);
2167 ret = -ETXTBSY;
2168 goto out;
2169 }
2170
2171 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2172 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
2173 goto out;
2174 }
2175
2176 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
2177 fs_info->fs_devices->rw_devices == 1) {
2178 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
2179 goto out;
2180 }
2181
2182 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2183 mutex_lock(&fs_info->chunk_mutex);
2184 list_del_init(&device->dev_alloc_list);
2185 device->fs_devices->rw_devices--;
2186 mutex_unlock(&fs_info->chunk_mutex);
2187 }
2188
2189 mutex_unlock(&uuid_mutex);
2190 ret = btrfs_shrink_device(device, 0);
2191 mutex_lock(&uuid_mutex);
2192 if (ret)
2193 goto error_undo;
2194
2195
2196
2197
2198
2199
2200 ret = btrfs_rm_dev_item(device);
2201 if (ret)
2202 goto error_undo;
2203
2204 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2205 btrfs_scrub_cancel_dev(device);
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222 cur_devices = device->fs_devices;
2223 mutex_lock(&fs_devices->device_list_mutex);
2224 list_del_rcu(&device->dev_list);
2225
2226 cur_devices->num_devices--;
2227 cur_devices->total_devices--;
2228
2229 if (cur_devices != fs_devices)
2230 fs_devices->total_devices--;
2231
2232 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
2233 cur_devices->missing_devices--;
2234
2235 btrfs_assign_next_active_device(device, NULL);
2236
2237 if (device->bdev) {
2238 cur_devices->open_devices--;
2239
2240 btrfs_sysfs_rm_device_link(fs_devices, device);
2241 }
2242
2243 num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
2244 btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
2245 mutex_unlock(&fs_devices->device_list_mutex);
2246
2247
2248
2249
2250
2251
2252 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2253 btrfs_scratch_superblocks(device->bdev, device->name->str);
2254
2255 btrfs_close_bdev(device);
2256 synchronize_rcu();
2257 btrfs_free_device(device);
2258
2259 if (cur_devices->open_devices == 0) {
2260 while (fs_devices) {
2261 if (fs_devices->seed == cur_devices) {
2262 fs_devices->seed = cur_devices->seed;
2263 break;
2264 }
2265 fs_devices = fs_devices->seed;
2266 }
2267 cur_devices->seed = NULL;
2268 close_fs_devices(cur_devices);
2269 free_fs_devices(cur_devices);
2270 }
2271
2272 out:
2273 mutex_unlock(&uuid_mutex);
2274 return ret;
2275
2276 error_undo:
2277 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2278 mutex_lock(&fs_info->chunk_mutex);
2279 list_add(&device->dev_alloc_list,
2280 &fs_devices->alloc_list);
2281 device->fs_devices->rw_devices++;
2282 mutex_unlock(&fs_info->chunk_mutex);
2283 }
2284 goto out;
2285 }
2286
2287 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2288 {
2289 struct btrfs_fs_devices *fs_devices;
2290
2291 lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
2292
2293
2294
2295
2296
2297
2298
2299 fs_devices = srcdev->fs_devices;
2300
2301 list_del_rcu(&srcdev->dev_list);
2302 list_del(&srcdev->dev_alloc_list);
2303 fs_devices->num_devices--;
2304 if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2305 fs_devices->missing_devices--;
2306
2307 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
2308 fs_devices->rw_devices--;
2309
2310 if (srcdev->bdev)
2311 fs_devices->open_devices--;
2312 }
2313
2314 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
2315 {
2316 struct btrfs_fs_info *fs_info = srcdev->fs_info;
2317 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2318
2319 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
2320
2321 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
2322 }
2323
2324 btrfs_close_bdev(srcdev);
2325 synchronize_rcu();
2326 btrfs_free_device(srcdev);
2327
2328
2329 if (!fs_devices->num_devices) {
2330 struct btrfs_fs_devices *tmp_fs_devices;
2331
2332
2333
2334
2335
2336
2337
2338 ASSERT(fs_devices->seeding);
2339
2340 tmp_fs_devices = fs_info->fs_devices;
2341 while (tmp_fs_devices) {
2342 if (tmp_fs_devices->seed == fs_devices) {
2343 tmp_fs_devices->seed = fs_devices->seed;
2344 break;
2345 }
2346 tmp_fs_devices = tmp_fs_devices->seed;
2347 }
2348 fs_devices->seed = NULL;
2349 close_fs_devices(fs_devices);
2350 free_fs_devices(fs_devices);
2351 }
2352 }
2353
2354 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2355 {
2356 struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
2357
2358 WARN_ON(!tgtdev);
2359 mutex_lock(&fs_devices->device_list_mutex);
2360
2361 btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
2362
2363 if (tgtdev->bdev)
2364 fs_devices->open_devices--;
2365
2366 fs_devices->num_devices--;
2367
2368 btrfs_assign_next_active_device(tgtdev, NULL);
2369
2370 list_del_rcu(&tgtdev->dev_list);
2371
2372 mutex_unlock(&fs_devices->device_list_mutex);
2373
2374
2375
2376
2377
2378
2379
2380
2381 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
2382
2383 btrfs_close_bdev(tgtdev);
2384 synchronize_rcu();
2385 btrfs_free_device(tgtdev);
2386 }
2387
2388 static struct btrfs_device *btrfs_find_device_by_path(
2389 struct btrfs_fs_info *fs_info, const char *device_path)
2390 {
2391 int ret = 0;
2392 struct btrfs_super_block *disk_super;
2393 u64 devid;
2394 u8 *dev_uuid;
2395 struct block_device *bdev;
2396 struct buffer_head *bh;
2397 struct btrfs_device *device;
2398
2399 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
2400 fs_info->bdev_holder, 0, &bdev, &bh);
2401 if (ret)
2402 return ERR_PTR(ret);
2403 disk_super = (struct btrfs_super_block *)bh->b_data;
2404 devid = btrfs_stack_device_id(&disk_super->dev_item);
2405 dev_uuid = disk_super->dev_item.uuid;
2406 if (btrfs_fs_incompat(fs_info, METADATA_UUID))
2407 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2408 disk_super->metadata_uuid, true);
2409 else
2410 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2411 disk_super->fsid, true);
2412
2413 brelse(bh);
2414 if (!device)
2415 device = ERR_PTR(-ENOENT);
2416 blkdev_put(bdev, FMODE_READ);
2417 return device;
2418 }
2419
2420
2421
2422
2423 struct btrfs_device *btrfs_find_device_by_devspec(
2424 struct btrfs_fs_info *fs_info, u64 devid,
2425 const char *device_path)
2426 {
2427 struct btrfs_device *device;
2428
2429 if (devid) {
2430 device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
2431 NULL, true);
2432 if (!device)
2433 return ERR_PTR(-ENOENT);
2434 return device;
2435 }
2436
2437 if (!device_path || !device_path[0])
2438 return ERR_PTR(-EINVAL);
2439
2440 if (strcmp(device_path, "missing") == 0) {
2441
2442 list_for_each_entry(device, &fs_info->fs_devices->devices,
2443 dev_list) {
2444 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2445 &device->dev_state) && !device->bdev)
2446 return device;
2447 }
2448 return ERR_PTR(-ENOENT);
2449 }
2450
2451 return btrfs_find_device_by_path(fs_info, device_path);
2452 }
2453
2454
2455
2456
2457 static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2458 {
2459 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2460 struct btrfs_fs_devices *old_devices;
2461 struct btrfs_fs_devices *seed_devices;
2462 struct btrfs_super_block *disk_super = fs_info->super_copy;
2463 struct btrfs_device *device;
2464 u64 super_flags;
2465
2466 lockdep_assert_held(&uuid_mutex);
2467 if (!fs_devices->seeding)
2468 return -EINVAL;
2469
2470 seed_devices = alloc_fs_devices(NULL, NULL);
2471 if (IS_ERR(seed_devices))
2472 return PTR_ERR(seed_devices);
2473
2474 old_devices = clone_fs_devices(fs_devices);
2475 if (IS_ERR(old_devices)) {
2476 kfree(seed_devices);
2477 return PTR_ERR(old_devices);
2478 }
2479
2480 list_add(&old_devices->fs_list, &fs_uuids);
2481
2482 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2483 seed_devices->opened = 1;
2484 INIT_LIST_HEAD(&seed_devices->devices);
2485 INIT_LIST_HEAD(&seed_devices->alloc_list);
2486 mutex_init(&seed_devices->device_list_mutex);
2487
2488 mutex_lock(&fs_devices->device_list_mutex);
2489 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2490 synchronize_rcu);
2491 list_for_each_entry(device, &seed_devices->devices, dev_list)
2492 device->fs_devices = seed_devices;
2493
2494 mutex_lock(&fs_info->chunk_mutex);
2495 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
2496 mutex_unlock(&fs_info->chunk_mutex);
2497
2498 fs_devices->seeding = 0;
2499 fs_devices->num_devices = 0;
2500 fs_devices->open_devices = 0;
2501 fs_devices->missing_devices = 0;
2502 fs_devices->rotating = 0;
2503 fs_devices->seed = seed_devices;
2504
2505 generate_random_uuid(fs_devices->fsid);
2506 memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
2507 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2508 mutex_unlock(&fs_devices->device_list_mutex);
2509
2510 super_flags = btrfs_super_flags(disk_super) &
2511 ~BTRFS_SUPER_FLAG_SEEDING;
2512 btrfs_set_super_flags(disk_super, super_flags);
2513
2514 return 0;
2515 }
2516
2517
2518
2519
2520 static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
2521 {
2522 struct btrfs_fs_info *fs_info = trans->fs_info;
2523 struct btrfs_root *root = fs_info->chunk_root;
2524 struct btrfs_path *path;
2525 struct extent_buffer *leaf;
2526 struct btrfs_dev_item *dev_item;
2527 struct btrfs_device *device;
2528 struct btrfs_key key;
2529 u8 fs_uuid[BTRFS_FSID_SIZE];
2530 u8 dev_uuid[BTRFS_UUID_SIZE];
2531 u64 devid;
2532 int ret;
2533
2534 path = btrfs_alloc_path();
2535 if (!path)
2536 return -ENOMEM;
2537
2538 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2539 key.offset = 0;
2540 key.type = BTRFS_DEV_ITEM_KEY;
2541
2542 while (1) {
2543 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2544 if (ret < 0)
2545 goto error;
2546
2547 leaf = path->nodes[0];
2548 next_slot:
2549 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2550 ret = btrfs_next_leaf(root, path);
2551 if (ret > 0)
2552 break;
2553 if (ret < 0)
2554 goto error;
2555 leaf = path->nodes[0];
2556 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2557 btrfs_release_path(path);
2558 continue;
2559 }
2560
2561 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2562 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
2563 key.type != BTRFS_DEV_ITEM_KEY)
2564 break;
2565
2566 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2567 struct btrfs_dev_item);
2568 devid = btrfs_device_id(leaf, dev_item);
2569 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
2570 BTRFS_UUID_SIZE);
2571 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
2572 BTRFS_FSID_SIZE);
2573 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2574 fs_uuid, true);
2575 BUG_ON(!device);
2576
2577 if (device->fs_devices->seeding) {
2578 btrfs_set_device_generation(leaf, dev_item,
2579 device->generation);
2580 btrfs_mark_buffer_dirty(leaf);
2581 }
2582
2583 path->slots[0]++;
2584 goto next_slot;
2585 }
2586 ret = 0;
2587 error:
2588 btrfs_free_path(path);
2589 return ret;
2590 }
2591
2592 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2593 {
2594 struct btrfs_root *root = fs_info->dev_root;
2595 struct request_queue *q;
2596 struct btrfs_trans_handle *trans;
2597 struct btrfs_device *device;
2598 struct block_device *bdev;
2599 struct super_block *sb = fs_info->sb;
2600 struct rcu_string *name;
2601 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2602 u64 orig_super_total_bytes;
2603 u64 orig_super_num_devices;
2604 int seeding_dev = 0;
2605 int ret = 0;
2606 bool unlocked = false;
2607
2608 if (sb_rdonly(sb) && !fs_devices->seeding)
2609 return -EROFS;
2610
2611 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2612 fs_info->bdev_holder);
2613 if (IS_ERR(bdev))
2614 return PTR_ERR(bdev);
2615
2616 if (fs_devices->seeding) {
2617 seeding_dev = 1;
2618 down_write(&sb->s_umount);
2619 mutex_lock(&uuid_mutex);
2620 }
2621
2622 filemap_write_and_wait(bdev->bd_inode->i_mapping);
2623
2624 mutex_lock(&fs_devices->device_list_mutex);
2625 list_for_each_entry(device, &fs_devices->devices, dev_list) {
2626 if (device->bdev == bdev) {
2627 ret = -EEXIST;
2628 mutex_unlock(
2629 &fs_devices->device_list_mutex);
2630 goto error;
2631 }
2632 }
2633 mutex_unlock(&fs_devices->device_list_mutex);
2634
2635 device = btrfs_alloc_device(fs_info, NULL, NULL);
2636 if (IS_ERR(device)) {
2637
2638 ret = PTR_ERR(device);
2639 goto error;
2640 }
2641
2642 name = rcu_string_strdup(device_path, GFP_KERNEL);
2643 if (!name) {
2644 ret = -ENOMEM;
2645 goto error_free_device;
2646 }
2647 rcu_assign_pointer(device->name, name);
2648
2649 trans = btrfs_start_transaction(root, 0);
2650 if (IS_ERR(trans)) {
2651 ret = PTR_ERR(trans);
2652 goto error_free_device;
2653 }
2654
2655 q = bdev_get_queue(bdev);
2656 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2657 device->generation = trans->transid;
2658 device->io_width = fs_info->sectorsize;
2659 device->io_align = fs_info->sectorsize;
2660 device->sector_size = fs_info->sectorsize;
2661 device->total_bytes = round_down(i_size_read(bdev->bd_inode),
2662 fs_info->sectorsize);
2663 device->disk_total_bytes = device->total_bytes;
2664 device->commit_total_bytes = device->total_bytes;
2665 device->fs_info = fs_info;
2666 device->bdev = bdev;
2667 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2668 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2669 device->mode = FMODE_EXCL;
2670 device->dev_stats_valid = 1;
2671 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2672
2673 if (seeding_dev) {
2674 sb->s_flags &= ~SB_RDONLY;
2675 ret = btrfs_prepare_sprout(fs_info);
2676 if (ret) {
2677 btrfs_abort_transaction(trans, ret);
2678 goto error_trans;
2679 }
2680 }
2681
2682 device->fs_devices = fs_devices;
2683
2684 mutex_lock(&fs_devices->device_list_mutex);
2685 mutex_lock(&fs_info->chunk_mutex);
2686 list_add_rcu(&device->dev_list, &fs_devices->devices);
2687 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
2688 fs_devices->num_devices++;
2689 fs_devices->open_devices++;
2690 fs_devices->rw_devices++;
2691 fs_devices->total_devices++;
2692 fs_devices->total_rw_bytes += device->total_bytes;
2693
2694 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2695
2696 if (!blk_queue_nonrot(q))
2697 fs_devices->rotating = 1;
2698
2699 orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
2700 btrfs_set_super_total_bytes(fs_info->super_copy,
2701 round_down(orig_super_total_bytes + device->total_bytes,
2702 fs_info->sectorsize));
2703
2704 orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
2705 btrfs_set_super_num_devices(fs_info->super_copy,
2706 orig_super_num_devices + 1);
2707
2708
2709 btrfs_sysfs_add_device_link(fs_devices, device);
2710
2711
2712
2713
2714
2715 btrfs_clear_space_info_full(fs_info);
2716
2717 mutex_unlock(&fs_info->chunk_mutex);
2718 mutex_unlock(&fs_devices->device_list_mutex);
2719
2720 if (seeding_dev) {
2721 mutex_lock(&fs_info->chunk_mutex);
2722 ret = init_first_rw_device(trans);
2723 mutex_unlock(&fs_info->chunk_mutex);
2724 if (ret) {
2725 btrfs_abort_transaction(trans, ret);
2726 goto error_sysfs;
2727 }
2728 }
2729
2730 ret = btrfs_add_dev_item(trans, device);
2731 if (ret) {
2732 btrfs_abort_transaction(trans, ret);
2733 goto error_sysfs;
2734 }
2735
2736 if (seeding_dev) {
2737 ret = btrfs_finish_sprout(trans);
2738 if (ret) {
2739 btrfs_abort_transaction(trans, ret);
2740 goto error_sysfs;
2741 }
2742
2743 btrfs_sysfs_update_sprout_fsid(fs_devices,
2744 fs_info->fs_devices->fsid);
2745 }
2746
2747 ret = btrfs_commit_transaction(trans);
2748
2749 if (seeding_dev) {
2750 mutex_unlock(&uuid_mutex);
2751 up_write(&sb->s_umount);
2752 unlocked = true;
2753
2754 if (ret)
2755 return ret;
2756
2757 ret = btrfs_relocate_sys_chunks(fs_info);
2758 if (ret < 0)
2759 btrfs_handle_fs_error(fs_info, ret,
2760 "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2761 trans = btrfs_attach_transaction(root);
2762 if (IS_ERR(trans)) {
2763 if (PTR_ERR(trans) == -ENOENT)
2764 return 0;
2765 ret = PTR_ERR(trans);
2766 trans = NULL;
2767 goto error_sysfs;
2768 }
2769 ret = btrfs_commit_transaction(trans);
2770 }
2771
2772
2773 update_dev_time(device_path);
2774 return ret;
2775
2776 error_sysfs:
2777 btrfs_sysfs_rm_device_link(fs_devices, device);
2778 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2779 mutex_lock(&fs_info->chunk_mutex);
2780 list_del_rcu(&device->dev_list);
2781 list_del(&device->dev_alloc_list);
2782 fs_info->fs_devices->num_devices--;
2783 fs_info->fs_devices->open_devices--;
2784 fs_info->fs_devices->rw_devices--;
2785 fs_info->fs_devices->total_devices--;
2786 fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
2787 atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
2788 btrfs_set_super_total_bytes(fs_info->super_copy,
2789 orig_super_total_bytes);
2790 btrfs_set_super_num_devices(fs_info->super_copy,
2791 orig_super_num_devices);
2792 mutex_unlock(&fs_info->chunk_mutex);
2793 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2794 error_trans:
2795 if (seeding_dev)
2796 sb->s_flags |= SB_RDONLY;
2797 if (trans)
2798 btrfs_end_transaction(trans);
2799 error_free_device:
2800 btrfs_free_device(device);
2801 error:
2802 blkdev_put(bdev, FMODE_EXCL);
2803 if (seeding_dev && !unlocked) {
2804 mutex_unlock(&uuid_mutex);
2805 up_write(&sb->s_umount);
2806 }
2807 return ret;
2808 }
2809
2810 static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
2811 struct btrfs_device *device)
2812 {
2813 int ret;
2814 struct btrfs_path *path;
2815 struct btrfs_root *root = device->fs_info->chunk_root;
2816 struct btrfs_dev_item *dev_item;
2817 struct extent_buffer *leaf;
2818 struct btrfs_key key;
2819
2820 path = btrfs_alloc_path();
2821 if (!path)
2822 return -ENOMEM;
2823
2824 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2825 key.type = BTRFS_DEV_ITEM_KEY;
2826 key.offset = device->devid;
2827
2828 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2829 if (ret < 0)
2830 goto out;
2831
2832 if (ret > 0) {
2833 ret = -ENOENT;
2834 goto out;
2835 }
2836
2837 leaf = path->nodes[0];
2838 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
2839
2840 btrfs_set_device_id(leaf, dev_item, device->devid);
2841 btrfs_set_device_type(leaf, dev_item, device->type);
2842 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
2843 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
2844 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
2845 btrfs_set_device_total_bytes(leaf, dev_item,
2846 btrfs_device_get_disk_total_bytes(device));
2847 btrfs_set_device_bytes_used(leaf, dev_item,
2848 btrfs_device_get_bytes_used(device));
2849 btrfs_mark_buffer_dirty(leaf);
2850
2851 out:
2852 btrfs_free_path(path);
2853 return ret;
2854 }
2855
2856 int btrfs_grow_device(struct btrfs_trans_handle *trans,
2857 struct btrfs_device *device, u64 new_size)
2858 {
2859 struct btrfs_fs_info *fs_info = device->fs_info;
2860 struct btrfs_super_block *super_copy = fs_info->super_copy;
2861 u64 old_total;
2862 u64 diff;
2863
2864 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2865 return -EACCES;
2866
2867 new_size = round_down(new_size, fs_info->sectorsize);
2868
2869 mutex_lock(&fs_info->chunk_mutex);
2870 old_total = btrfs_super_total_bytes(super_copy);
2871 diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
2872
2873 if (new_size <= device->total_bytes ||
2874 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2875 mutex_unlock(&fs_info->chunk_mutex);
2876 return -EINVAL;
2877 }
2878
2879 btrfs_set_super_total_bytes(super_copy,
2880 round_down(old_total + diff, fs_info->sectorsize));
2881 device->fs_devices->total_rw_bytes += diff;
2882
2883 btrfs_device_set_total_bytes(device, new_size);
2884 btrfs_device_set_disk_total_bytes(device, new_size);
2885 btrfs_clear_space_info_full(device->fs_info);
2886 if (list_empty(&device->post_commit_list))
2887 list_add_tail(&device->post_commit_list,
2888 &trans->transaction->dev_update_list);
2889 mutex_unlock(&fs_info->chunk_mutex);
2890
2891 return btrfs_update_device(trans, device);
2892 }
2893
2894 static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2895 {
2896 struct btrfs_fs_info *fs_info = trans->fs_info;
2897 struct btrfs_root *root = fs_info->chunk_root;
2898 int ret;
2899 struct btrfs_path *path;
2900 struct btrfs_key key;
2901
2902 path = btrfs_alloc_path();
2903 if (!path)
2904 return -ENOMEM;
2905
2906 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2907 key.offset = chunk_offset;
2908 key.type = BTRFS_CHUNK_ITEM_KEY;
2909
2910 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2911 if (ret < 0)
2912 goto out;
2913 else if (ret > 0) {
2914 btrfs_handle_fs_error(fs_info, -ENOENT,
2915 "Failed lookup while freeing chunk.");
2916 ret = -ENOENT;
2917 goto out;
2918 }
2919
2920 ret = btrfs_del_item(trans, root, path);
2921 if (ret < 0)
2922 btrfs_handle_fs_error(fs_info, ret,
2923 "Failed to delete chunk item.");
2924 out:
2925 btrfs_free_path(path);
2926 return ret;
2927 }
2928
2929 static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2930 {
2931 struct btrfs_super_block *super_copy = fs_info->super_copy;
2932 struct btrfs_disk_key *disk_key;
2933 struct btrfs_chunk *chunk;
2934 u8 *ptr;
2935 int ret = 0;
2936 u32 num_stripes;
2937 u32 array_size;
2938 u32 len = 0;
2939 u32 cur;
2940 struct btrfs_key key;
2941
2942 mutex_lock(&fs_info->chunk_mutex);
2943 array_size = btrfs_super_sys_array_size(super_copy);
2944
2945 ptr = super_copy->sys_chunk_array;
2946 cur = 0;
2947
2948 while (cur < array_size) {
2949 disk_key = (struct btrfs_disk_key *)ptr;
2950 btrfs_disk_key_to_cpu(&key, disk_key);
2951
2952 len = sizeof(*disk_key);
2953
2954 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2955 chunk = (struct btrfs_chunk *)(ptr + len);
2956 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
2957 len += btrfs_chunk_item_size(num_stripes);
2958 } else {
2959 ret = -EIO;
2960 break;
2961 }
2962 if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
2963 key.offset == chunk_offset) {
2964 memmove(ptr, ptr + len, array_size - (cur + len));
2965 array_size -= len;
2966 btrfs_set_super_sys_array_size(super_copy, array_size);
2967 } else {
2968 ptr += len;
2969 cur += len;
2970 }
2971 }
2972 mutex_unlock(&fs_info->chunk_mutex);
2973 return ret;
2974 }
2975
2976
2977
2978
2979
2980
2981
2982
2983 struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
2984 u64 logical, u64 length)
2985 {
2986 struct extent_map_tree *em_tree;
2987 struct extent_map *em;
2988
2989 em_tree = &fs_info->mapping_tree;
2990 read_lock(&em_tree->lock);
2991 em = lookup_extent_mapping(em_tree, logical, length);
2992 read_unlock(&em_tree->lock);
2993
2994 if (!em) {
2995 btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2996 logical, length);
2997 return ERR_PTR(-EINVAL);
2998 }
2999
3000 if (em->start > logical || em->start + em->len < logical) {
3001 btrfs_crit(fs_info,
3002 "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
3003 logical, length, em->start, em->start + em->len);
3004 free_extent_map(em);
3005 return ERR_PTR(-EINVAL);
3006 }
3007
3008
3009 return em;
3010 }
3011
3012 int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
3013 {
3014 struct btrfs_fs_info *fs_info = trans->fs_info;
3015 struct extent_map *em;
3016 struct map_lookup *map;
3017 u64 dev_extent_len = 0;
3018 int i, ret = 0;
3019 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
3020
3021 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
3022 if (IS_ERR(em)) {
3023
3024
3025
3026
3027
3028 ASSERT(0);
3029 return PTR_ERR(em);
3030 }
3031 map = em->map_lookup;
3032 mutex_lock(&fs_info->chunk_mutex);
3033 check_system_chunk(trans, map->type);
3034 mutex_unlock(&fs_info->chunk_mutex);
3035
3036
3037
3038
3039
3040
3041 mutex_lock(&fs_devices->device_list_mutex);
3042 for (i = 0; i < map->num_stripes; i++) {
3043 struct btrfs_device *device = map->stripes[i].dev;
3044 ret = btrfs_free_dev_extent(trans, device,
3045 map->stripes[i].physical,
3046 &dev_extent_len);
3047 if (ret) {
3048 mutex_unlock(&fs_devices->device_list_mutex);
3049 btrfs_abort_transaction(trans, ret);
3050 goto out;
3051 }
3052
3053 if (device->bytes_used > 0) {
3054 mutex_lock(&fs_info->chunk_mutex);
3055 btrfs_device_set_bytes_used(device,
3056 device->bytes_used - dev_extent_len);
3057 atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
3058 btrfs_clear_space_info_full(fs_info);
3059 mutex_unlock(&fs_info->chunk_mutex);
3060 }
3061
3062 ret = btrfs_update_device(trans, device);
3063 if (ret) {
3064 mutex_unlock(&fs_devices->device_list_mutex);
3065 btrfs_abort_transaction(trans, ret);
3066 goto out;
3067 }
3068 }
3069 mutex_unlock(&fs_devices->device_list_mutex);
3070
3071 ret = btrfs_free_chunk(trans, chunk_offset);
3072 if (ret) {
3073 btrfs_abort_transaction(trans, ret);
3074 goto out;
3075 }
3076
3077 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
3078
3079 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
3080 ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
3081 if (ret) {
3082 btrfs_abort_transaction(trans, ret);
3083 goto out;
3084 }
3085 }
3086
3087 ret = btrfs_remove_block_group(trans, chunk_offset, em);
3088 if (ret) {
3089 btrfs_abort_transaction(trans, ret);
3090 goto out;
3091 }
3092
3093 out:
3094
3095 free_extent_map(em);
3096 return ret;
3097 }
3098
3099 static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
3100 {
3101 struct btrfs_root *root = fs_info->chunk_root;
3102 struct btrfs_trans_handle *trans;
3103 int ret;
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117 lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
3118
3119
3120 btrfs_scrub_pause(fs_info);
3121 ret = btrfs_relocate_block_group(fs_info, chunk_offset);
3122 btrfs_scrub_continue(fs_info);
3123 if (ret)
3124 return ret;
3125
3126 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3127 chunk_offset);
3128 if (IS_ERR(trans)) {
3129 ret = PTR_ERR(trans);
3130 btrfs_handle_fs_error(root->fs_info, ret, NULL);
3131 return ret;
3132 }
3133
3134
3135
3136
3137
3138 ret = btrfs_remove_chunk(trans, chunk_offset);
3139 btrfs_end_transaction(trans);
3140 return ret;
3141 }
3142
3143 static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
3144 {
3145 struct btrfs_root *chunk_root = fs_info->chunk_root;
3146 struct btrfs_path *path;
3147 struct extent_buffer *leaf;
3148 struct btrfs_chunk *chunk;
3149 struct btrfs_key key;
3150 struct btrfs_key found_key;
3151 u64 chunk_type;
3152 bool retried = false;
3153 int failed = 0;
3154 int ret;
3155
3156 path = btrfs_alloc_path();
3157 if (!path)
3158 return -ENOMEM;
3159
3160 again:
3161 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3162 key.offset = (u64)-1;
3163 key.type = BTRFS_CHUNK_ITEM_KEY;
3164
3165 while (1) {
3166 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3167 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3168 if (ret < 0) {
3169 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3170 goto error;
3171 }
3172 BUG_ON(ret == 0);
3173
3174 ret = btrfs_previous_item(chunk_root, path, key.objectid,
3175 key.type);
3176 if (ret)
3177 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3178 if (ret < 0)
3179 goto error;
3180 if (ret > 0)
3181 break;
3182
3183 leaf = path->nodes[0];
3184 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3185
3186 chunk = btrfs_item_ptr(leaf, path->slots[0],
3187 struct btrfs_chunk);
3188 chunk_type = btrfs_chunk_type(leaf, chunk);
3189 btrfs_release_path(path);
3190
3191 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
3192 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3193 if (ret == -ENOSPC)
3194 failed++;
3195 else
3196 BUG_ON(ret);
3197 }
3198 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3199
3200 if (found_key.offset == 0)
3201 break;
3202 key.offset = found_key.offset - 1;
3203 }
3204 ret = 0;
3205 if (failed && !retried) {
3206 failed = 0;
3207 retried = true;
3208 goto again;
3209 } else if (WARN_ON(failed && retried)) {
3210 ret = -ENOSPC;
3211 }
3212 error:
3213 btrfs_free_path(path);
3214 return ret;
3215 }
3216
3217
3218
3219
3220
3221
3222 static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3223 u64 chunk_offset)
3224 {
3225 struct btrfs_block_group_cache *cache;
3226 u64 bytes_used;
3227 u64 chunk_type;
3228
3229 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3230 ASSERT(cache);
3231 chunk_type = cache->flags;
3232 btrfs_put_block_group(cache);
3233
3234 if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
3235 spin_lock(&fs_info->data_sinfo->lock);
3236 bytes_used = fs_info->data_sinfo->bytes_used;
3237 spin_unlock(&fs_info->data_sinfo->lock);
3238
3239 if (!bytes_used) {
3240 struct btrfs_trans_handle *trans;
3241 int ret;
3242
3243 trans = btrfs_join_transaction(fs_info->tree_root);
3244 if (IS_ERR(trans))
3245 return PTR_ERR(trans);
3246
3247 ret = btrfs_force_chunk_alloc(trans,
3248 BTRFS_BLOCK_GROUP_DATA);
3249 btrfs_end_transaction(trans);
3250 if (ret < 0)
3251 return ret;
3252 return 1;
3253 }
3254 }
3255 return 0;
3256 }
3257
3258 static int insert_balance_item(struct btrfs_fs_info *fs_info,
3259 struct btrfs_balance_control *bctl)
3260 {
3261 struct btrfs_root *root = fs_info->tree_root;
3262 struct btrfs_trans_handle *trans;
3263 struct btrfs_balance_item *item;
3264 struct btrfs_disk_balance_args disk_bargs;
3265 struct btrfs_path *path;
3266 struct extent_buffer *leaf;
3267 struct btrfs_key key;
3268 int ret, err;
3269
3270 path = btrfs_alloc_path();
3271 if (!path)
3272 return -ENOMEM;
3273
3274 trans = btrfs_start_transaction(root, 0);
3275 if (IS_ERR(trans)) {
3276 btrfs_free_path(path);
3277 return PTR_ERR(trans);
3278 }
3279
3280 key.objectid = BTRFS_BALANCE_OBJECTID;
3281 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3282 key.offset = 0;
3283
3284 ret = btrfs_insert_empty_item(trans, root, path, &key,
3285 sizeof(*item));
3286 if (ret)
3287 goto out;
3288
3289 leaf = path->nodes[0];
3290 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
3291
3292 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3293
3294 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
3295 btrfs_set_balance_data(leaf, item, &disk_bargs);
3296 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
3297 btrfs_set_balance_meta(leaf, item, &disk_bargs);
3298 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
3299 btrfs_set_balance_sys(leaf, item, &disk_bargs);
3300
3301 btrfs_set_balance_flags(leaf, item, bctl->flags);
3302
3303 btrfs_mark_buffer_dirty(leaf);
3304 out:
3305 btrfs_free_path(path);
3306 err = btrfs_commit_transaction(trans);
3307 if (err && !ret)
3308 ret = err;
3309 return ret;
3310 }
3311
3312 static int del_balance_item(struct btrfs_fs_info *fs_info)
3313 {
3314 struct btrfs_root *root = fs_info->tree_root;
3315 struct btrfs_trans_handle *trans;
3316 struct btrfs_path *path;
3317 struct btrfs_key key;
3318 int ret, err;
3319
3320 path = btrfs_alloc_path();
3321 if (!path)
3322 return -ENOMEM;
3323
3324 trans = btrfs_start_transaction(root, 0);
3325 if (IS_ERR(trans)) {
3326 btrfs_free_path(path);
3327 return PTR_ERR(trans);
3328 }
3329
3330 key.objectid = BTRFS_BALANCE_OBJECTID;
3331 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3332 key.offset = 0;
3333
3334 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3335 if (ret < 0)
3336 goto out;
3337 if (ret > 0) {
3338 ret = -ENOENT;
3339 goto out;
3340 }
3341
3342 ret = btrfs_del_item(trans, root, path);
3343 out:
3344 btrfs_free_path(path);
3345 err = btrfs_commit_transaction(trans);
3346 if (err && !ret)
3347 ret = err;
3348 return ret;
3349 }
3350
3351
3352
3353
3354
3355 static void update_balance_args(struct btrfs_balance_control *bctl)
3356 {
3357
3358
3359
3360 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
3361 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
3362 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
3363 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
3364 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
3365 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
3366
3367
3368
3369
3370
3371
3372
3373
3374 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3375 !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3376 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3377 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
3378 bctl->data.usage = 90;
3379 }
3380 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3381 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3382 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3383 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
3384 bctl->sys.usage = 90;
3385 }
3386 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3387 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3388 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3389 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
3390 bctl->meta.usage = 90;
3391 }
3392 }
3393
3394
3395
3396
3397 static void reset_balance_state(struct btrfs_fs_info *fs_info)
3398 {
3399 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3400 int ret;
3401
3402 BUG_ON(!fs_info->balance_ctl);
3403
3404 spin_lock(&fs_info->balance_lock);
3405 fs_info->balance_ctl = NULL;
3406 spin_unlock(&fs_info->balance_lock);
3407
3408 kfree(bctl);
3409 ret = del_balance_item(fs_info);
3410 if (ret)
3411 btrfs_handle_fs_error(fs_info, ret, NULL);
3412 }
3413
3414
3415
3416
3417
3418 static int chunk_profiles_filter(u64 chunk_type,
3419 struct btrfs_balance_args *bargs)
3420 {
3421 chunk_type = chunk_to_extended(chunk_type) &
3422 BTRFS_EXTENDED_PROFILE_MASK;
3423
3424 if (bargs->profiles & chunk_type)
3425 return 0;
3426
3427 return 1;
3428 }
3429
3430 static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
3431 struct btrfs_balance_args *bargs)
3432 {
3433 struct btrfs_block_group_cache *cache;
3434 u64 chunk_used;
3435 u64 user_thresh_min;
3436 u64 user_thresh_max;
3437 int ret = 1;
3438
3439 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3440 chunk_used = btrfs_block_group_used(&cache->item);
3441
3442 if (bargs->usage_min == 0)
3443 user_thresh_min = 0;
3444 else
3445 user_thresh_min = div_factor_fine(cache->key.offset,
3446 bargs->usage_min);
3447
3448 if (bargs->usage_max == 0)
3449 user_thresh_max = 1;
3450 else if (bargs->usage_max > 100)
3451 user_thresh_max = cache->key.offset;
3452 else
3453 user_thresh_max = div_factor_fine(cache->key.offset,
3454 bargs->usage_max);
3455
3456 if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3457 ret = 0;
3458
3459 btrfs_put_block_group(cache);
3460 return ret;
3461 }
3462
3463 static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3464 u64 chunk_offset, struct btrfs_balance_args *bargs)
3465 {
3466 struct btrfs_block_group_cache *cache;
3467 u64 chunk_used, user_thresh;
3468 int ret = 1;
3469
3470 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3471 chunk_used = btrfs_block_group_used(&cache->item);
3472
3473 if (bargs->usage_min == 0)
3474 user_thresh = 1;
3475 else if (bargs->usage > 100)
3476 user_thresh = cache->key.offset;
3477 else
3478 user_thresh = div_factor_fine(cache->key.offset,
3479 bargs->usage);
3480
3481 if (chunk_used < user_thresh)
3482 ret = 0;
3483
3484 btrfs_put_block_group(cache);
3485 return ret;
3486 }
3487
3488 static int chunk_devid_filter(struct extent_buffer *leaf,
3489 struct btrfs_chunk *chunk,
3490 struct btrfs_balance_args *bargs)
3491 {
3492 struct btrfs_stripe *stripe;
3493 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3494 int i;
3495
3496 for (i = 0; i < num_stripes; i++) {
3497 stripe = btrfs_stripe_nr(chunk, i);
3498 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3499 return 0;
3500 }
3501
3502 return 1;
3503 }
3504
3505 static u64 calc_data_stripes(u64 type, int num_stripes)
3506 {
3507 const int index = btrfs_bg_flags_to_raid_index(type);
3508 const int ncopies = btrfs_raid_array[index].ncopies;
3509 const int nparity = btrfs_raid_array[index].nparity;
3510
3511 if (nparity)
3512 return num_stripes - nparity;
3513 else
3514 return num_stripes / ncopies;
3515 }
3516
3517
3518 static int chunk_drange_filter(struct extent_buffer *leaf,
3519 struct btrfs_chunk *chunk,
3520 struct btrfs_balance_args *bargs)
3521 {
3522 struct btrfs_stripe *stripe;
3523 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3524 u64 stripe_offset;
3525 u64 stripe_length;
3526 u64 type;
3527 int factor;
3528 int i;
3529
3530 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
3531 return 0;
3532
3533 type = btrfs_chunk_type(leaf, chunk);
3534 factor = calc_data_stripes(type, num_stripes);
3535
3536 for (i = 0; i < num_stripes; i++) {
3537 stripe = btrfs_stripe_nr(chunk, i);
3538 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
3539 continue;
3540
3541 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3542 stripe_length = btrfs_chunk_length(leaf, chunk);
3543 stripe_length = div_u64(stripe_length, factor);
3544
3545 if (stripe_offset < bargs->pend &&
3546 stripe_offset + stripe_length > bargs->pstart)
3547 return 0;
3548 }
3549
3550 return 1;
3551 }
3552
3553
3554 static int chunk_vrange_filter(struct extent_buffer *leaf,
3555 struct btrfs_chunk *chunk,
3556 u64 chunk_offset,
3557 struct btrfs_balance_args *bargs)
3558 {
3559 if (chunk_offset < bargs->vend &&
3560 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3561
3562 return 0;
3563
3564 return 1;
3565 }
3566
3567 static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3568 struct btrfs_chunk *chunk,
3569 struct btrfs_balance_args *bargs)
3570 {
3571 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3572
3573 if (bargs->stripes_min <= num_stripes
3574 && num_stripes <= bargs->stripes_max)
3575 return 0;
3576
3577 return 1;
3578 }
3579
3580 static int chunk_soft_convert_filter(u64 chunk_type,
3581 struct btrfs_balance_args *bargs)
3582 {
3583 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3584 return 0;
3585
3586 chunk_type = chunk_to_extended(chunk_type) &
3587 BTRFS_EXTENDED_PROFILE_MASK;
3588
3589 if (bargs->target == chunk_type)
3590 return 1;
3591
3592 return 0;
3593 }
3594
3595 static int should_balance_chunk(struct extent_buffer *leaf,
3596 struct btrfs_chunk *chunk, u64 chunk_offset)
3597 {
3598 struct btrfs_fs_info *fs_info = leaf->fs_info;
3599 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3600 struct btrfs_balance_args *bargs = NULL;
3601 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3602
3603
3604 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3605 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3606 return 0;
3607 }
3608
3609 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3610 bargs = &bctl->data;
3611 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3612 bargs = &bctl->sys;
3613 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3614 bargs = &bctl->meta;
3615
3616
3617 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3618 chunk_profiles_filter(chunk_type, bargs)) {
3619 return 0;
3620 }
3621
3622
3623 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
3624 chunk_usage_filter(fs_info, chunk_offset, bargs)) {
3625 return 0;
3626 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3627 chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3628 return 0;
3629 }
3630
3631
3632 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3633 chunk_devid_filter(leaf, chunk, bargs)) {
3634 return 0;
3635 }
3636
3637
3638 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3639 chunk_drange_filter(leaf, chunk, bargs)) {
3640 return 0;
3641 }
3642
3643
3644 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3645 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3646 return 0;
3647 }
3648
3649
3650 if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3651 chunk_stripes_range_filter(leaf, chunk, bargs)) {
3652 return 0;
3653 }
3654
3655
3656 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3657 chunk_soft_convert_filter(chunk_type, bargs)) {
3658 return 0;
3659 }
3660
3661
3662
3663
3664 if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
3665 if (bargs->limit == 0)
3666 return 0;
3667 else
3668 bargs->limit--;
3669 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
3670
3671
3672
3673
3674
3675 if (bargs->limit_max == 0)
3676 return 0;
3677 else
3678 bargs->limit_max--;
3679 }
3680
3681 return 1;
3682 }
3683
3684 static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3685 {
3686 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3687 struct btrfs_root *chunk_root = fs_info->chunk_root;
3688 u64 chunk_type;
3689 struct btrfs_chunk *chunk;
3690 struct btrfs_path *path = NULL;
3691 struct btrfs_key key;
3692 struct btrfs_key found_key;
3693 struct extent_buffer *leaf;
3694 int slot;
3695 int ret;
3696 int enospc_errors = 0;
3697 bool counting = true;
3698
3699 u64 limit_data = bctl->data.limit;
3700 u64 limit_meta = bctl->meta.limit;
3701 u64 limit_sys = bctl->sys.limit;
3702 u32 count_data = 0;
3703 u32 count_meta = 0;
3704 u32 count_sys = 0;
3705 int chunk_reserved = 0;
3706
3707 path = btrfs_alloc_path();
3708 if (!path) {
3709 ret = -ENOMEM;
3710 goto error;
3711 }
3712
3713
3714 spin_lock(&fs_info->balance_lock);
3715 memset(&bctl->stat, 0, sizeof(bctl->stat));
3716 spin_unlock(&fs_info->balance_lock);
3717 again:
3718 if (!counting) {
3719
3720
3721
3722
3723 bctl->data.limit = limit_data;
3724 bctl->meta.limit = limit_meta;
3725 bctl->sys.limit = limit_sys;
3726 }
3727 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3728 key.offset = (u64)-1;
3729 key.type = BTRFS_CHUNK_ITEM_KEY;
3730
3731 while (1) {
3732 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3733 atomic_read(&fs_info->balance_cancel_req)) {
3734 ret = -ECANCELED;
3735 goto error;
3736 }
3737
3738 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3739 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3740 if (ret < 0) {
3741 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3742 goto error;
3743 }
3744
3745
3746
3747
3748
3749 if (ret == 0)
3750 BUG();
3751
3752 ret = btrfs_previous_item(chunk_root, path, 0,
3753 BTRFS_CHUNK_ITEM_KEY);
3754 if (ret) {
3755 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3756 ret = 0;
3757 break;
3758 }
3759
3760 leaf = path->nodes[0];
3761 slot = path->slots[0];
3762 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3763
3764 if (found_key.objectid != key.objectid) {
3765 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3766 break;
3767 }
3768
3769 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3770 chunk_type = btrfs_chunk_type(leaf, chunk);
3771
3772 if (!counting) {
3773 spin_lock(&fs_info->balance_lock);
3774 bctl->stat.considered++;
3775 spin_unlock(&fs_info->balance_lock);
3776 }
3777
3778 ret = should_balance_chunk(leaf, chunk, found_key.offset);
3779
3780 btrfs_release_path(path);
3781 if (!ret) {
3782 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3783 goto loop;
3784 }
3785
3786 if (counting) {
3787 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3788 spin_lock(&fs_info->balance_lock);
3789 bctl->stat.expected++;
3790 spin_unlock(&fs_info->balance_lock);
3791
3792 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3793 count_data++;
3794 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3795 count_sys++;
3796 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3797 count_meta++;
3798
3799 goto loop;
3800 }
3801
3802
3803
3804
3805
3806 if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
3807 count_data < bctl->data.limit_min)
3808 || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
3809 count_meta < bctl->meta.limit_min)
3810 || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
3811 count_sys < bctl->sys.limit_min)) {
3812 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3813 goto loop;
3814 }
3815
3816 if (!chunk_reserved) {
3817
3818
3819
3820
3821
3822
3823 ret = btrfs_may_alloc_data_chunk(fs_info,
3824 found_key.offset);
3825 if (ret < 0) {
3826 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3827 goto error;
3828 } else if (ret == 1) {
3829 chunk_reserved = 1;
3830 }
3831 }
3832
3833 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3834 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3835 if (ret == -ENOSPC) {
3836 enospc_errors++;
3837 } else if (ret == -ETXTBSY) {
3838 btrfs_info(fs_info,
3839 "skipping relocation of block group %llu due to active swapfile",
3840 found_key.offset);
3841 ret = 0;
3842 } else if (ret) {
3843 goto error;
3844 } else {
3845 spin_lock(&fs_info->balance_lock);
3846 bctl->stat.completed++;
3847 spin_unlock(&fs_info->balance_lock);
3848 }
3849 loop:
3850 if (found_key.offset == 0)
3851 break;
3852 key.offset = found_key.offset - 1;
3853 }
3854
3855 if (counting) {
3856 btrfs_release_path(path);
3857 counting = false;
3858 goto again;
3859 }
3860 error:
3861 btrfs_free_path(path);
3862 if (enospc_errors) {
3863 btrfs_info(fs_info, "%d enospc errors during balance",
3864 enospc_errors);
3865 if (!ret)
3866 ret = -ENOSPC;
3867 }
3868
3869 return ret;
3870 }
3871
3872
3873
3874
3875
3876
3877 static int alloc_profile_is_valid(u64 flags, int extended)
3878 {
3879 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
3880 BTRFS_BLOCK_GROUP_PROFILE_MASK);
3881
3882 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
3883
3884
3885 if (flags & ~mask)
3886 return 0;
3887
3888
3889 if (flags == 0)
3890 return !extended;
3891
3892
3893
3894
3895
3896
3897 return flags != 0 && (flags & (flags - 1)) == 0;
3898 }
3899
3900 static inline int balance_need_close(struct btrfs_fs_info *fs_info)
3901 {
3902
3903 return atomic_read(&fs_info->balance_cancel_req) ||
3904 (atomic_read(&fs_info->balance_pause_req) == 0 &&
3905 atomic_read(&fs_info->balance_cancel_req) == 0);
3906 }
3907
3908
3909 static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
3910 u64 allowed)
3911 {
3912 return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3913 (!alloc_profile_is_valid(bctl_arg->target, 1) ||
3914 (bctl_arg->target & ~allowed)));
3915 }
3916
3917
3918
3919
3920
3921
3922 static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
3923 u32 size_buf)
3924 {
3925 int ret;
3926 u32 size_bp = size_buf;
3927 char *bp = buf;
3928 u64 flags = bargs->flags;
3929 char tmp_buf[128] = {'\0'};
3930
3931 if (!flags)
3932 return;
3933
3934 #define CHECK_APPEND_NOARG(a) \
3935 do { \
3936 ret = snprintf(bp, size_bp, (a)); \
3937 if (ret < 0 || ret >= size_bp) \
3938 goto out_overflow; \
3939 size_bp -= ret; \
3940 bp += ret; \
3941 } while (0)
3942
3943 #define CHECK_APPEND_1ARG(a, v1) \
3944 do { \
3945 ret = snprintf(bp, size_bp, (a), (v1)); \
3946 if (ret < 0 || ret >= size_bp) \
3947 goto out_overflow; \
3948 size_bp -= ret; \
3949 bp += ret; \
3950 } while (0)
3951
3952 #define CHECK_APPEND_2ARG(a, v1, v2) \
3953 do { \
3954 ret = snprintf(bp, size_bp, (a), (v1), (v2)); \
3955 if (ret < 0 || ret >= size_bp) \
3956 goto out_overflow; \
3957 size_bp -= ret; \
3958 bp += ret; \
3959 } while (0)
3960
3961 if (flags & BTRFS_BALANCE_ARGS_CONVERT)
3962 CHECK_APPEND_1ARG("convert=%s,",
3963 btrfs_bg_type_to_raid_name(bargs->target));
3964
3965 if (flags & BTRFS_BALANCE_ARGS_SOFT)
3966 CHECK_APPEND_NOARG("soft,");
3967
3968 if (flags & BTRFS_BALANCE_ARGS_PROFILES) {
3969 btrfs_describe_block_groups(bargs->profiles, tmp_buf,
3970 sizeof(tmp_buf));
3971 CHECK_APPEND_1ARG("profiles=%s,", tmp_buf);
3972 }
3973
3974 if (flags & BTRFS_BALANCE_ARGS_USAGE)
3975 CHECK_APPEND_1ARG("usage=%llu,", bargs->usage);
3976
3977 if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE)
3978 CHECK_APPEND_2ARG("usage=%u..%u,",
3979 bargs->usage_min, bargs->usage_max);
3980
3981 if (flags & BTRFS_BALANCE_ARGS_DEVID)
3982 CHECK_APPEND_1ARG("devid=%llu,", bargs->devid);
3983
3984 if (flags & BTRFS_BALANCE_ARGS_DRANGE)
3985 CHECK_APPEND_2ARG("drange=%llu..%llu,",
3986 bargs->pstart, bargs->pend);
3987
3988 if (flags & BTRFS_BALANCE_ARGS_VRANGE)
3989 CHECK_APPEND_2ARG("vrange=%llu..%llu,",
3990 bargs->vstart, bargs->vend);
3991
3992 if (flags & BTRFS_BALANCE_ARGS_LIMIT)
3993 CHECK_APPEND_1ARG("limit=%llu,", bargs->limit);
3994
3995 if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)
3996 CHECK_APPEND_2ARG("limit=%u..%u,",
3997 bargs->limit_min, bargs->limit_max);
3998
3999 if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE)
4000 CHECK_APPEND_2ARG("stripes=%u..%u,",
4001 bargs->stripes_min, bargs->stripes_max);
4002
4003 #undef CHECK_APPEND_2ARG
4004 #undef CHECK_APPEND_1ARG
4005 #undef CHECK_APPEND_NOARG
4006
4007 out_overflow:
4008
4009 if (size_bp < size_buf)
4010 buf[size_buf - size_bp - 1] = '\0';
4011 else
4012 buf[0] = '\0';
4013 }
4014
4015 static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
4016 {
4017 u32 size_buf = 1024;
4018 char tmp_buf[192] = {'\0'};
4019 char *buf;
4020 char *bp;
4021 u32 size_bp = size_buf;
4022 int ret;
4023 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4024
4025 buf = kzalloc(size_buf, GFP_KERNEL);
4026 if (!buf)
4027 return;
4028
4029 bp = buf;
4030
4031 #define CHECK_APPEND_1ARG(a, v1) \
4032 do { \
4033 ret = snprintf(bp, size_bp, (a), (v1)); \
4034 if (ret < 0 || ret >= size_bp) \
4035 goto out_overflow; \
4036 size_bp -= ret; \
4037 bp += ret; \
4038 } while (0)
4039
4040 if (bctl->flags & BTRFS_BALANCE_FORCE)
4041 CHECK_APPEND_1ARG("%s", "-f ");
4042
4043 if (bctl->flags & BTRFS_BALANCE_DATA) {
4044 describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf));
4045 CHECK_APPEND_1ARG("-d%s ", tmp_buf);
4046 }
4047
4048 if (bctl->flags & BTRFS_BALANCE_METADATA) {
4049 describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf));
4050 CHECK_APPEND_1ARG("-m%s ", tmp_buf);
4051 }
4052
4053 if (bctl->flags & BTRFS_BALANCE_SYSTEM) {
4054 describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf));
4055 CHECK_APPEND_1ARG("-s%s ", tmp_buf);
4056 }
4057
4058 #undef CHECK_APPEND_1ARG
4059
4060 out_overflow:
4061
4062 if (size_bp < size_buf)
4063 buf[size_buf - size_bp - 1] = '\0';
4064 btrfs_info(fs_info, "balance: %s %s",
4065 (bctl->flags & BTRFS_BALANCE_RESUME) ?
4066 "resume" : "start", buf);
4067
4068 kfree(buf);
4069 }
4070
4071
4072
4073
4074 int btrfs_balance(struct btrfs_fs_info *fs_info,
4075 struct btrfs_balance_control *bctl,
4076 struct btrfs_ioctl_balance_args *bargs)
4077 {
4078 u64 meta_target, data_target;
4079 u64 allowed;
4080 int mixed = 0;
4081 int ret;
4082 u64 num_devices;
4083 unsigned seq;
4084 bool reducing_integrity;
4085 int i;
4086
4087 if (btrfs_fs_closing(fs_info) ||
4088 atomic_read(&fs_info->balance_pause_req) ||
4089 atomic_read(&fs_info->balance_cancel_req)) {
4090 ret = -EINVAL;
4091 goto out;
4092 }
4093
4094 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
4095 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
4096 mixed = 1;
4097
4098
4099
4100
4101
4102 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
4103 if (mixed && (bctl->flags & allowed)) {
4104 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
4105 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
4106 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
4107 btrfs_err(fs_info,
4108 "balance: mixed groups data and metadata options must be the same");
4109 ret = -EINVAL;
4110 goto out;
4111 }
4112 }
4113
4114
4115
4116
4117
4118 num_devices = fs_info->fs_devices->rw_devices;
4119
4120
4121
4122
4123
4124
4125 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
4126 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
4127 if (num_devices >= btrfs_raid_array[i].devs_min)
4128 allowed |= btrfs_raid_array[i].bg_flag;
4129
4130 if (validate_convert_profile(&bctl->data, allowed)) {
4131 btrfs_err(fs_info,
4132 "balance: invalid convert data profile %s",
4133 btrfs_bg_type_to_raid_name(bctl->data.target));
4134 ret = -EINVAL;
4135 goto out;
4136 }
4137 if (validate_convert_profile(&bctl->meta, allowed)) {
4138 btrfs_err(fs_info,
4139 "balance: invalid convert metadata profile %s",
4140 btrfs_bg_type_to_raid_name(bctl->meta.target));
4141 ret = -EINVAL;
4142 goto out;
4143 }
4144 if (validate_convert_profile(&bctl->sys, allowed)) {
4145 btrfs_err(fs_info,
4146 "balance: invalid convert system profile %s",
4147 btrfs_bg_type_to_raid_name(bctl->sys.target));
4148 ret = -EINVAL;
4149 goto out;
4150 }
4151
4152
4153
4154
4155
4156 allowed = 0;
4157 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
4158 if (btrfs_raid_array[i].ncopies >= 2 ||
4159 btrfs_raid_array[i].tolerated_failures >= 1)
4160 allowed |= btrfs_raid_array[i].bg_flag;
4161 }
4162 do {
4163 seq = read_seqbegin(&fs_info->profiles_lock);
4164
4165 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4166 (fs_info->avail_system_alloc_bits & allowed) &&
4167 !(bctl->sys.target & allowed)) ||
4168 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4169 (fs_info->avail_metadata_alloc_bits & allowed) &&
4170 !(bctl->meta.target & allowed)))
4171 reducing_integrity = true;
4172 else
4173 reducing_integrity = false;
4174
4175
4176 meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4177 bctl->meta.target : fs_info->avail_metadata_alloc_bits;
4178 data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4179 bctl->data.target : fs_info->avail_data_alloc_bits;
4180 } while (read_seqretry(&fs_info->profiles_lock, seq));
4181
4182 if (reducing_integrity) {
4183 if (bctl->flags & BTRFS_BALANCE_FORCE) {
4184 btrfs_info(fs_info,
4185 "balance: force reducing metadata integrity");
4186 } else {
4187 btrfs_err(fs_info,
4188 "balance: reduces metadata integrity, use --force if you want this");
4189 ret = -EINVAL;
4190 goto out;
4191 }
4192 }
4193
4194 if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
4195 btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
4196 btrfs_warn(fs_info,
4197 "balance: metadata profile %s has lower redundancy than data profile %s",
4198 btrfs_bg_type_to_raid_name(meta_target),
4199 btrfs_bg_type_to_raid_name(data_target));
4200 }
4201
4202 if (fs_info->send_in_progress) {
4203 btrfs_warn_rl(fs_info,
4204 "cannot run balance while send operations are in progress (%d in progress)",
4205 fs_info->send_in_progress);
4206 ret = -EAGAIN;
4207 goto out;
4208 }
4209
4210 ret = insert_balance_item(fs_info, bctl);
4211 if (ret && ret != -EEXIST)
4212 goto out;
4213
4214 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
4215 BUG_ON(ret == -EEXIST);
4216 BUG_ON(fs_info->balance_ctl);
4217 spin_lock(&fs_info->balance_lock);
4218 fs_info->balance_ctl = bctl;
4219 spin_unlock(&fs_info->balance_lock);
4220 } else {
4221 BUG_ON(ret != -EEXIST);
4222 spin_lock(&fs_info->balance_lock);
4223 update_balance_args(bctl);
4224 spin_unlock(&fs_info->balance_lock);
4225 }
4226
4227 ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4228 set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4229 describe_balance_start_or_resume(fs_info);
4230 mutex_unlock(&fs_info->balance_mutex);
4231
4232 ret = __btrfs_balance(fs_info);
4233
4234 mutex_lock(&fs_info->balance_mutex);
4235 if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
4236 btrfs_info(fs_info, "balance: paused");
4237 else if (ret == -ECANCELED && atomic_read(&fs_info->balance_cancel_req))
4238 btrfs_info(fs_info, "balance: canceled");
4239 else
4240 btrfs_info(fs_info, "balance: ended with status: %d", ret);
4241
4242 clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4243
4244 if (bargs) {
4245 memset(bargs, 0, sizeof(*bargs));
4246 btrfs_update_ioctl_balance_args(fs_info, bargs);
4247 }
4248
4249 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4250 balance_need_close(fs_info)) {
4251 reset_balance_state(fs_info);
4252 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4253 }
4254
4255 wake_up(&fs_info->balance_wait_q);
4256
4257 return ret;
4258 out:
4259 if (bctl->flags & BTRFS_BALANCE_RESUME)
4260 reset_balance_state(fs_info);
4261 else
4262 kfree(bctl);
4263 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4264
4265 return ret;
4266 }
4267
4268 static int balance_kthread(void *data)
4269 {
4270 struct btrfs_fs_info *fs_info = data;
4271 int ret = 0;
4272
4273 mutex_lock(&fs_info->balance_mutex);
4274 if (fs_info->balance_ctl)
4275 ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
4276 mutex_unlock(&fs_info->balance_mutex);
4277
4278 return ret;
4279 }
4280
4281 int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
4282 {
4283 struct task_struct *tsk;
4284
4285 mutex_lock(&fs_info->balance_mutex);
4286 if (!fs_info->balance_ctl) {
4287 mutex_unlock(&fs_info->balance_mutex);
4288 return 0;
4289 }
4290 mutex_unlock(&fs_info->balance_mutex);
4291
4292 if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4293 btrfs_info(fs_info, "balance: resume skipped");
4294 return 0;
4295 }
4296
4297
4298
4299
4300
4301
4302 spin_lock(&fs_info->balance_lock);
4303 fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
4304 spin_unlock(&fs_info->balance_lock);
4305
4306 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4307 return PTR_ERR_OR_ZERO(tsk);
4308 }
4309
4310 int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
4311 {
4312 struct btrfs_balance_control *bctl;
4313 struct btrfs_balance_item *item;
4314 struct btrfs_disk_balance_args disk_bargs;
4315 struct btrfs_path *path;
4316 struct extent_buffer *leaf;
4317 struct btrfs_key key;
4318 int ret;
4319
4320 path = btrfs_alloc_path();
4321 if (!path)
4322 return -ENOMEM;
4323
4324 key.objectid = BTRFS_BALANCE_OBJECTID;
4325 key.type = BTRFS_TEMPORARY_ITEM_KEY;
4326 key.offset = 0;
4327
4328 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4329 if (ret < 0)
4330 goto out;
4331 if (ret > 0) {
4332 ret = 0;
4333 goto out;
4334 }
4335
4336 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
4337 if (!bctl) {
4338 ret = -ENOMEM;
4339 goto out;
4340 }
4341
4342 leaf = path->nodes[0];
4343 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
4344
4345 bctl->flags = btrfs_balance_flags(leaf, item);
4346 bctl->flags |= BTRFS_BALANCE_RESUME;
4347
4348 btrfs_balance_data(leaf, item, &disk_bargs);
4349 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
4350 btrfs_balance_meta(leaf, item, &disk_bargs);
4351 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
4352 btrfs_balance_sys(leaf, item, &disk_bargs);
4353 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
4366 btrfs_warn(fs_info,
4367 "balance: cannot set exclusive op status, resume manually");
4368
4369 mutex_lock(&fs_info->balance_mutex);
4370 BUG_ON(fs_info->balance_ctl);
4371 spin_lock(&fs_info->balance_lock);
4372 fs_info->balance_ctl = bctl;
4373 spin_unlock(&fs_info->balance_lock);
4374 mutex_unlock(&fs_info->balance_mutex);
4375 out:
4376 btrfs_free_path(path);
4377 return ret;
4378 }
4379
4380 int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4381 {
4382 int ret = 0;
4383
4384 mutex_lock(&fs_info->balance_mutex);
4385 if (!fs_info->balance_ctl) {
4386 mutex_unlock(&fs_info->balance_mutex);
4387 return -ENOTCONN;
4388 }
4389
4390 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4391 atomic_inc(&fs_info->balance_pause_req);
4392 mutex_unlock(&fs_info->balance_mutex);
4393
4394 wait_event(fs_info->balance_wait_q,
4395 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4396
4397 mutex_lock(&fs_info->balance_mutex);
4398
4399 BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4400 atomic_dec(&fs_info->balance_pause_req);
4401 } else {
4402 ret = -ENOTCONN;
4403 }
4404
4405 mutex_unlock(&fs_info->balance_mutex);
4406 return ret;
4407 }
4408
4409 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4410 {
4411 mutex_lock(&fs_info->balance_mutex);
4412 if (!fs_info->balance_ctl) {
4413 mutex_unlock(&fs_info->balance_mutex);
4414 return -ENOTCONN;
4415 }
4416
4417
4418
4419
4420
4421
4422 if (sb_rdonly(fs_info->sb)) {
4423 mutex_unlock(&fs_info->balance_mutex);
4424 return -EROFS;
4425 }
4426
4427 atomic_inc(&fs_info->balance_cancel_req);
4428
4429
4430
4431
4432 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4433 mutex_unlock(&fs_info->balance_mutex);
4434 wait_event(fs_info->balance_wait_q,
4435 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4436 mutex_lock(&fs_info->balance_mutex);
4437 } else {
4438 mutex_unlock(&fs_info->balance_mutex);
4439
4440
4441
4442
4443 mutex_lock(&fs_info->balance_mutex);
4444
4445 if (fs_info->balance_ctl) {
4446 reset_balance_state(fs_info);
4447 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4448 btrfs_info(fs_info, "balance: canceled");
4449 }
4450 }
4451
4452 BUG_ON(fs_info->balance_ctl ||
4453 test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4454 atomic_dec(&fs_info->balance_cancel_req);
4455 mutex_unlock(&fs_info->balance_mutex);
4456 return 0;
4457 }
4458
4459 static int btrfs_uuid_scan_kthread(void *data)
4460 {
4461 struct btrfs_fs_info *fs_info = data;
4462 struct btrfs_root *root = fs_info->tree_root;
4463 struct btrfs_key key;
4464 struct btrfs_path *path = NULL;
4465 int ret = 0;
4466 struct extent_buffer *eb;
4467 int slot;
4468 struct btrfs_root_item root_item;
4469 u32 item_size;
4470 struct btrfs_trans_handle *trans = NULL;
4471
4472 path = btrfs_alloc_path();
4473 if (!path) {
4474 ret = -ENOMEM;
4475 goto out;
4476 }
4477
4478 key.objectid = 0;
4479 key.type = BTRFS_ROOT_ITEM_KEY;
4480 key.offset = 0;
4481
4482 while (1) {
4483 ret = btrfs_search_forward(root, &key, path,
4484 BTRFS_OLDEST_GENERATION);
4485 if (ret) {
4486 if (ret > 0)
4487 ret = 0;
4488 break;
4489 }
4490
4491 if (key.type != BTRFS_ROOT_ITEM_KEY ||
4492 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4493 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4494 key.objectid > BTRFS_LAST_FREE_OBJECTID)
4495 goto skip;
4496
4497 eb = path->nodes[0];
4498 slot = path->slots[0];
4499 item_size = btrfs_item_size_nr(eb, slot);
4500 if (item_size < sizeof(root_item))
4501 goto skip;
4502
4503 read_extent_buffer(eb, &root_item,
4504 btrfs_item_ptr_offset(eb, slot),
4505 (int)sizeof(root_item));
4506 if (btrfs_root_refs(&root_item) == 0)
4507 goto skip;
4508
4509 if (!btrfs_is_empty_uuid(root_item.uuid) ||
4510 !btrfs_is_empty_uuid(root_item.received_uuid)) {
4511 if (trans)
4512 goto update_tree;
4513
4514 btrfs_release_path(path);
4515
4516
4517
4518
4519 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4520 if (IS_ERR(trans)) {
4521 ret = PTR_ERR(trans);
4522 break;
4523 }
4524 continue;
4525 } else {
4526 goto skip;
4527 }
4528 update_tree:
4529 if (!btrfs_is_empty_uuid(root_item.uuid)) {
4530 ret = btrfs_uuid_tree_add(trans, root_item.uuid,
4531 BTRFS_UUID_KEY_SUBVOL,
4532 key.objectid);
4533 if (ret < 0) {
4534 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4535 ret);
4536 break;
4537 }
4538 }
4539
4540 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
4541 ret = btrfs_uuid_tree_add(trans,
4542 root_item.received_uuid,
4543 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4544 key.objectid);
4545 if (ret < 0) {
4546 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4547 ret);
4548 break;
4549 }
4550 }
4551
4552 skip:
4553 if (trans) {
4554 ret = btrfs_end_transaction(trans);
4555 trans = NULL;
4556 if (ret)
4557 break;
4558 }
4559
4560 btrfs_release_path(path);
4561 if (key.offset < (u64)-1) {
4562 key.offset++;
4563 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4564 key.offset = 0;
4565 key.type = BTRFS_ROOT_ITEM_KEY;
4566 } else if (key.objectid < (u64)-1) {
4567 key.offset = 0;
4568 key.type = BTRFS_ROOT_ITEM_KEY;
4569 key.objectid++;
4570 } else {
4571 break;
4572 }
4573 cond_resched();
4574 }
4575
4576 out:
4577 btrfs_free_path(path);
4578 if (trans && !IS_ERR(trans))
4579 btrfs_end_transaction(trans);
4580 if (ret)
4581 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
4582 else
4583 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4584 up(&fs_info->uuid_tree_rescan_sem);
4585 return 0;
4586 }
4587
4588
4589
4590
4591
4592
4593
4594
4595 static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
4596 u8 *uuid, u8 type, u64 subid)
4597 {
4598 struct btrfs_key key;
4599 int ret = 0;
4600 struct btrfs_root *subvol_root;
4601
4602 if (type != BTRFS_UUID_KEY_SUBVOL &&
4603 type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
4604 goto out;
4605
4606 key.objectid = subid;
4607 key.type = BTRFS_ROOT_ITEM_KEY;
4608 key.offset = (u64)-1;
4609 subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
4610 if (IS_ERR(subvol_root)) {
4611 ret = PTR_ERR(subvol_root);
4612 if (ret == -ENOENT)
4613 ret = 1;
4614 goto out;
4615 }
4616
4617 switch (type) {
4618 case BTRFS_UUID_KEY_SUBVOL:
4619 if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
4620 ret = 1;
4621 break;
4622 case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
4623 if (memcmp(uuid, subvol_root->root_item.received_uuid,
4624 BTRFS_UUID_SIZE))
4625 ret = 1;
4626 break;
4627 }
4628
4629 out:
4630 return ret;
4631 }
4632
4633 static int btrfs_uuid_rescan_kthread(void *data)
4634 {
4635 struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
4636 int ret;
4637
4638
4639
4640
4641
4642
4643 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
4644 if (ret < 0) {
4645 btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
4646 up(&fs_info->uuid_tree_rescan_sem);
4647 return ret;
4648 }
4649 return btrfs_uuid_scan_kthread(data);
4650 }
4651
4652 int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4653 {
4654 struct btrfs_trans_handle *trans;
4655 struct btrfs_root *tree_root = fs_info->tree_root;
4656 struct btrfs_root *uuid_root;
4657 struct task_struct *task;
4658 int ret;
4659
4660
4661
4662
4663
4664 trans = btrfs_start_transaction(tree_root, 2);
4665 if (IS_ERR(trans))
4666 return PTR_ERR(trans);
4667
4668 uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
4669 if (IS_ERR(uuid_root)) {
4670 ret = PTR_ERR(uuid_root);
4671 btrfs_abort_transaction(trans, ret);
4672 btrfs_end_transaction(trans);
4673 return ret;
4674 }
4675
4676 fs_info->uuid_root = uuid_root;
4677
4678 ret = btrfs_commit_transaction(trans);
4679 if (ret)
4680 return ret;
4681
4682 down(&fs_info->uuid_tree_rescan_sem);
4683 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4684 if (IS_ERR(task)) {
4685
4686 btrfs_warn(fs_info, "failed to start uuid_scan task");
4687 up(&fs_info->uuid_tree_rescan_sem);
4688 return PTR_ERR(task);
4689 }
4690
4691 return 0;
4692 }
4693
4694 int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
4695 {
4696 struct task_struct *task;
4697
4698 down(&fs_info->uuid_tree_rescan_sem);
4699 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
4700 if (IS_ERR(task)) {
4701
4702 btrfs_warn(fs_info, "failed to start uuid_rescan task");
4703 up(&fs_info->uuid_tree_rescan_sem);
4704 return PTR_ERR(task);
4705 }
4706
4707 return 0;
4708 }
4709
4710
4711
4712
4713
4714
4715 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4716 {
4717 struct btrfs_fs_info *fs_info = device->fs_info;
4718 struct btrfs_root *root = fs_info->dev_root;
4719 struct btrfs_trans_handle *trans;
4720 struct btrfs_dev_extent *dev_extent = NULL;
4721 struct btrfs_path *path;
4722 u64 length;
4723 u64 chunk_offset;
4724 int ret;
4725 int slot;
4726 int failed = 0;
4727 bool retried = false;
4728 struct extent_buffer *l;
4729 struct btrfs_key key;
4730 struct btrfs_super_block *super_copy = fs_info->super_copy;
4731 u64 old_total = btrfs_super_total_bytes(super_copy);
4732 u64 old_size = btrfs_device_get_total_bytes(device);
4733 u64 diff;
4734 u64 start;
4735
4736 new_size = round_down(new_size, fs_info->sectorsize);
4737 start = new_size;
4738 diff = round_down(old_size - new_size, fs_info->sectorsize);
4739
4740 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4741 return -EINVAL;
4742
4743 path = btrfs_alloc_path();
4744 if (!path)
4745 return -ENOMEM;
4746
4747 path->reada = READA_BACK;
4748
4749 trans = btrfs_start_transaction(root, 0);
4750 if (IS_ERR(trans)) {
4751 btrfs_free_path(path);
4752 return PTR_ERR(trans);
4753 }
4754
4755 mutex_lock(&fs_info->chunk_mutex);
4756
4757 btrfs_device_set_total_bytes(device, new_size);
4758 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4759 device->fs_devices->total_rw_bytes -= diff;
4760 atomic64_sub(diff, &fs_info->free_chunk_space);
4761 }
4762
4763
4764
4765
4766
4767
4768 if (contains_pending_extent(device, &start, diff)) {
4769 mutex_unlock(&fs_info->chunk_mutex);
4770 ret = btrfs_commit_transaction(trans);
4771 if (ret)
4772 goto done;
4773 } else {
4774 mutex_unlock(&fs_info->chunk_mutex);
4775 btrfs_end_transaction(trans);
4776 }
4777
4778 again:
4779 key.objectid = device->devid;
4780 key.offset = (u64)-1;
4781 key.type = BTRFS_DEV_EXTENT_KEY;
4782
4783 do {
4784 mutex_lock(&fs_info->delete_unused_bgs_mutex);
4785 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4786 if (ret < 0) {
4787 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4788 goto done;
4789 }
4790
4791 ret = btrfs_previous_item(root, path, 0, key.type);
4792 if (ret)
4793 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4794 if (ret < 0)
4795 goto done;
4796 if (ret) {
4797 ret = 0;
4798 btrfs_release_path(path);
4799 break;
4800 }
4801
4802 l = path->nodes[0];
4803 slot = path->slots[0];
4804 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4805
4806 if (key.objectid != device->devid) {
4807 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4808 btrfs_release_path(path);
4809 break;
4810 }
4811
4812 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
4813 length = btrfs_dev_extent_length(l, dev_extent);
4814
4815 if (key.offset + length <= new_size) {
4816 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4817 btrfs_release_path(path);
4818 break;
4819 }
4820
4821 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4822 btrfs_release_path(path);
4823
4824
4825
4826
4827
4828
4829
4830 ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4831 if (ret < 0) {
4832 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4833 goto done;
4834 }
4835
4836 ret = btrfs_relocate_chunk(fs_info, chunk_offset);
4837 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4838 if (ret == -ENOSPC) {
4839 failed++;
4840 } else if (ret) {
4841 if (ret == -ETXTBSY) {
4842 btrfs_warn(fs_info,
4843 "could not shrink block group %llu due to active swapfile",
4844 chunk_offset);
4845 }
4846 goto done;
4847 }
4848 } while (key.offset-- > 0);
4849
4850 if (failed && !retried) {
4851 failed = 0;
4852 retried = true;
4853 goto again;
4854 } else if (failed && retried) {
4855 ret = -ENOSPC;
4856 goto done;
4857 }
4858
4859
4860 trans = btrfs_start_transaction(root, 0);
4861 if (IS_ERR(trans)) {
4862 ret = PTR_ERR(trans);
4863 goto done;
4864 }
4865
4866 mutex_lock(&fs_info->chunk_mutex);
4867 btrfs_device_set_disk_total_bytes(device, new_size);
4868 if (list_empty(&device->post_commit_list))
4869 list_add_tail(&device->post_commit_list,
4870 &trans->transaction->dev_update_list);
4871
4872 WARN_ON(diff > old_total);
4873 btrfs_set_super_total_bytes(super_copy,
4874 round_down(old_total - diff, fs_info->sectorsize));
4875 mutex_unlock(&fs_info->chunk_mutex);
4876
4877
4878 ret = btrfs_update_device(trans, device);
4879 if (ret < 0) {
4880 btrfs_abort_transaction(trans, ret);
4881 btrfs_end_transaction(trans);
4882 } else {
4883 ret = btrfs_commit_transaction(trans);
4884 }
4885 done:
4886 btrfs_free_path(path);
4887 if (ret) {
4888 mutex_lock(&fs_info->chunk_mutex);
4889 btrfs_device_set_total_bytes(device, old_size);
4890 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
4891 device->fs_devices->total_rw_bytes += diff;
4892 atomic64_add(diff, &fs_info->free_chunk_space);
4893 mutex_unlock(&fs_info->chunk_mutex);
4894 }
4895 return ret;
4896 }
4897
4898 static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
4899 struct btrfs_key *key,
4900 struct btrfs_chunk *chunk, int item_size)
4901 {
4902 struct btrfs_super_block *super_copy = fs_info->super_copy;
4903 struct btrfs_disk_key disk_key;
4904 u32 array_size;
4905 u8 *ptr;
4906
4907 mutex_lock(&fs_info->chunk_mutex);
4908 array_size = btrfs_super_sys_array_size(super_copy);
4909 if (array_size + item_size + sizeof(disk_key)
4910 > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
4911 mutex_unlock(&fs_info->chunk_mutex);
4912 return -EFBIG;
4913 }
4914
4915 ptr = super_copy->sys_chunk_array + array_size;
4916 btrfs_cpu_key_to_disk(&disk_key, key);
4917 memcpy(ptr, &disk_key, sizeof(disk_key));
4918 ptr += sizeof(disk_key);
4919 memcpy(ptr, chunk, item_size);
4920 item_size += sizeof(disk_key);
4921 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
4922 mutex_unlock(&fs_info->chunk_mutex);
4923
4924 return 0;
4925 }
4926
4927
4928
4929
4930 static int btrfs_cmp_device_info(const void *a, const void *b)
4931 {
4932 const struct btrfs_device_info *di_a = a;
4933 const struct btrfs_device_info *di_b = b;
4934
4935 if (di_a->max_avail > di_b->max_avail)
4936 return -1;
4937 if (di_a->max_avail < di_b->max_avail)
4938 return 1;
4939 if (di_a->total_avail > di_b->total_avail)
4940 return -1;
4941 if (di_a->total_avail < di_b->total_avail)
4942 return 1;
4943 return 0;
4944 }
4945
4946 static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4947 {
4948 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4949 return;
4950
4951 btrfs_set_fs_incompat(info, RAID56);
4952 }
4953
4954 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4955 u64 start, u64 type)
4956 {
4957 struct btrfs_fs_info *info = trans->fs_info;
4958 struct btrfs_fs_devices *fs_devices = info->fs_devices;
4959 struct btrfs_device *device;
4960 struct map_lookup *map = NULL;
4961 struct extent_map_tree *em_tree;
4962 struct extent_map *em;
4963 struct btrfs_device_info *devices_info = NULL;
4964 u64 total_avail;
4965 int num_stripes;
4966 int data_stripes;
4967
4968 int sub_stripes;
4969 int dev_stripes;
4970 int devs_max;
4971 int devs_min;
4972 int devs_increment;
4973 int ncopies;
4974 int nparity;
4975
4976 int ret;
4977 u64 max_stripe_size;
4978 u64 max_chunk_size;
4979 u64 stripe_size;
4980 u64 chunk_size;
4981 int ndevs;
4982 int i;
4983 int j;
4984 int index;
4985
4986 BUG_ON(!alloc_profile_is_valid(type, 0));
4987
4988 if (list_empty(&fs_devices->alloc_list)) {
4989 if (btrfs_test_opt(info, ENOSPC_DEBUG))
4990 btrfs_debug(info, "%s: no writable device", __func__);
4991 return -ENOSPC;
4992 }
4993
4994 index = btrfs_bg_flags_to_raid_index(type);
4995
4996 sub_stripes = btrfs_raid_array[index].sub_stripes;
4997 dev_stripes = btrfs_raid_array[index].dev_stripes;
4998 devs_max = btrfs_raid_array[index].devs_max;
4999 if (!devs_max)
5000 devs_max = BTRFS_MAX_DEVS(info);
5001 devs_min = btrfs_raid_array[index].devs_min;
5002 devs_increment = btrfs_raid_array[index].devs_increment;
5003 ncopies = btrfs_raid_array[index].ncopies;
5004 nparity = btrfs_raid_array[index].nparity;
5005
5006 if (type & BTRFS_BLOCK_GROUP_DATA) {
5007 max_stripe_size = SZ_1G;
5008 max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
5009 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
5010
5011 if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
5012 max_stripe_size = SZ_1G;
5013 else
5014 max_stripe_size = SZ_256M;
5015 max_chunk_size = max_stripe_size;
5016 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
5017 max_stripe_size = SZ_32M;
5018 max_chunk_size = 2 * max_stripe_size;
5019 devs_max = min_t(int, devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
5020 } else {
5021 btrfs_err(info, "invalid chunk type 0x%llx requested",
5022 type);
5023 BUG();
5024 }
5025
5026
5027 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
5028 max_chunk_size);
5029
5030 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
5031 GFP_NOFS);
5032 if (!devices_info)
5033 return -ENOMEM;
5034
5035
5036
5037
5038
5039 ndevs = 0;
5040 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
5041 u64 max_avail;
5042 u64 dev_offset;
5043
5044 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
5045 WARN(1, KERN_ERR
5046 "BTRFS: read-only device in alloc_list\n");
5047 continue;
5048 }
5049
5050 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
5051 &device->dev_state) ||
5052 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
5053 continue;
5054
5055 if (device->total_bytes > device->bytes_used)
5056 total_avail = device->total_bytes - device->bytes_used;
5057 else
5058 total_avail = 0;
5059
5060
5061 if (total_avail == 0)
5062 continue;
5063
5064 ret = find_free_dev_extent(device,
5065 max_stripe_size * dev_stripes,
5066 &dev_offset, &max_avail);
5067 if (ret && ret != -ENOSPC)
5068 goto error;
5069
5070 if (ret == 0)
5071 max_avail = max_stripe_size * dev_stripes;
5072
5073 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) {
5074 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5075 btrfs_debug(info,
5076 "%s: devid %llu has no free space, have=%llu want=%u",
5077 __func__, device->devid, max_avail,
5078 BTRFS_STRIPE_LEN * dev_stripes);
5079 continue;
5080 }
5081
5082 if (ndevs == fs_devices->rw_devices) {
5083 WARN(1, "%s: found more than %llu devices\n",
5084 __func__, fs_devices->rw_devices);
5085 break;
5086 }
5087 devices_info[ndevs].dev_offset = dev_offset;
5088 devices_info[ndevs].max_avail = max_avail;
5089 devices_info[ndevs].total_avail = total_avail;
5090 devices_info[ndevs].dev = device;
5091 ++ndevs;
5092 }
5093
5094
5095
5096
5097 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
5098 btrfs_cmp_device_info, NULL);
5099
5100
5101 ndevs = round_down(ndevs, devs_increment);
5102
5103 if (ndevs < devs_min) {
5104 ret = -ENOSPC;
5105 if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
5106 btrfs_debug(info,
5107 "%s: not enough devices with free space: have=%d minimum required=%d",
5108 __func__, ndevs, devs_min);
5109 }
5110 goto error;
5111 }
5112
5113 ndevs = min(ndevs, devs_max);
5114
5115
5116
5117
5118
5119
5120
5121
5122 stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
5123 num_stripes = ndevs * dev_stripes;
5124
5125
5126
5127
5128
5129 data_stripes = (num_stripes - nparity) / ncopies;
5130
5131
5132
5133
5134
5135
5136
5137 if (stripe_size * data_stripes > max_chunk_size) {
5138
5139
5140
5141
5142
5143 stripe_size = min(round_up(div_u64(max_chunk_size,
5144 data_stripes), SZ_16M),
5145 stripe_size);
5146 }
5147
5148
5149 stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
5150
5151 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
5152 if (!map) {
5153 ret = -ENOMEM;
5154 goto error;
5155 }
5156 map->num_stripes = num_stripes;
5157
5158 for (i = 0; i < ndevs; ++i) {
5159 for (j = 0; j < dev_stripes; ++j) {
5160 int s = i * dev_stripes + j;
5161 map->stripes[s].dev = devices_info[i].dev;
5162 map->stripes[s].physical = devices_info[i].dev_offset +
5163 j * stripe_size;
5164 }
5165 }
5166 map->stripe_len = BTRFS_STRIPE_LEN;
5167 map->io_align = BTRFS_STRIPE_LEN;
5168 map->io_width = BTRFS_STRIPE_LEN;
5169 map->type = type;
5170 map->sub_stripes = sub_stripes;
5171
5172 chunk_size = stripe_size * data_stripes;
5173
5174 trace_btrfs_chunk_alloc(info, map, start, chunk_size);
5175
5176 em = alloc_extent_map();
5177 if (!em) {
5178 kfree(map);
5179 ret = -ENOMEM;
5180 goto error;
5181 }
5182 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
5183 em->map_lookup = map;
5184 em->start = start;
5185 em->len = chunk_size;
5186 em->block_start = 0;
5187 em->block_len = em->len;
5188 em->orig_block_len = stripe_size;
5189
5190 em_tree = &info->mapping_tree;
5191 write_lock(&em_tree->lock);
5192 ret = add_extent_mapping(em_tree, em, 0);
5193 if (ret) {
5194 write_unlock(&em_tree->lock);
5195 free_extent_map(em);
5196 goto error;
5197 }
5198 write_unlock(&em_tree->lock);
5199
5200 ret = btrfs_make_block_group(trans, 0, type, start, chunk_size);
5201 if (ret)
5202 goto error_del_extent;
5203
5204 for (i = 0; i < map->num_stripes; i++) {
5205 struct btrfs_device *dev = map->stripes[i].dev;
5206
5207 btrfs_device_set_bytes_used(dev, dev->bytes_used + stripe_size);
5208 if (list_empty(&dev->post_commit_list))
5209 list_add_tail(&dev->post_commit_list,
5210 &trans->transaction->dev_update_list);
5211 }
5212
5213 atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
5214
5215 free_extent_map(em);
5216 check_raid56_incompat_flag(info, type);
5217
5218 kfree(devices_info);
5219 return 0;
5220
5221 error_del_extent:
5222 write_lock(&em_tree->lock);
5223 remove_extent_mapping(em_tree, em);
5224 write_unlock(&em_tree->lock);
5225
5226
5227 free_extent_map(em);
5228
5229 free_extent_map(em);
5230 error:
5231 kfree(devices_info);
5232 return ret;
5233 }
5234
5235 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
5236 u64 chunk_offset, u64 chunk_size)
5237 {
5238 struct btrfs_fs_info *fs_info = trans->fs_info;
5239 struct btrfs_root *extent_root = fs_info->extent_root;
5240 struct btrfs_root *chunk_root = fs_info->chunk_root;
5241 struct btrfs_key key;
5242 struct btrfs_device *device;
5243 struct btrfs_chunk *chunk;
5244 struct btrfs_stripe *stripe;
5245 struct extent_map *em;
5246 struct map_lookup *map;
5247 size_t item_size;
5248 u64 dev_offset;
5249 u64 stripe_size;
5250 int i = 0;
5251 int ret = 0;
5252
5253 em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
5254 if (IS_ERR(em))
5255 return PTR_ERR(em);
5256
5257 map = em->map_lookup;
5258 item_size = btrfs_chunk_item_size(map->num_stripes);
5259 stripe_size = em->orig_block_len;
5260
5261 chunk = kzalloc(item_size, GFP_NOFS);
5262 if (!chunk) {
5263 ret = -ENOMEM;
5264 goto out;
5265 }
5266
5267
5268
5269
5270
5271
5272
5273
5274 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5275 for (i = 0; i < map->num_stripes; i++) {
5276 device = map->stripes[i].dev;
5277 dev_offset = map->stripes[i].physical;
5278
5279 ret = btrfs_update_device(trans, device);
5280 if (ret)
5281 break;
5282 ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
5283 dev_offset, stripe_size);
5284 if (ret)
5285 break;
5286 }
5287 if (ret) {
5288 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5289 goto out;
5290 }
5291
5292 stripe = &chunk->stripe;
5293 for (i = 0; i < map->num_stripes; i++) {
5294 device = map->stripes[i].dev;
5295 dev_offset = map->stripes[i].physical;
5296
5297 btrfs_set_stack_stripe_devid(stripe, device->devid);
5298 btrfs_set_stack_stripe_offset(stripe, dev_offset);
5299 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
5300 stripe++;
5301 }
5302 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5303
5304 btrfs_set_stack_chunk_length(chunk, chunk_size);
5305 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
5306 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
5307 btrfs_set_stack_chunk_type(chunk, map->type);
5308 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
5309 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
5310 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
5311 btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
5312 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
5313
5314 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
5315 key.type = BTRFS_CHUNK_ITEM_KEY;
5316 key.offset = chunk_offset;
5317
5318 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
5319 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
5320
5321
5322
5323
5324 ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
5325 }
5326
5327 out:
5328 kfree(chunk);
5329 free_extent_map(em);
5330 return ret;
5331 }
5332
5333
5334
5335
5336
5337
5338
5339
5340 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
5341 {
5342 u64 chunk_offset;
5343
5344 lockdep_assert_held(&trans->fs_info->chunk_mutex);
5345 chunk_offset = find_next_chunk(trans->fs_info);
5346 return __btrfs_alloc_chunk(trans, chunk_offset, type);
5347 }
5348
5349 static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
5350 {
5351 struct btrfs_fs_info *fs_info = trans->fs_info;
5352 u64 chunk_offset;
5353 u64 sys_chunk_offset;
5354 u64 alloc_profile;
5355 int ret;
5356
5357 chunk_offset = find_next_chunk(fs_info);
5358 alloc_profile = btrfs_metadata_alloc_profile(fs_info);
5359 ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
5360 if (ret)
5361 return ret;
5362
5363 sys_chunk_offset = find_next_chunk(fs_info);
5364 alloc_profile = btrfs_system_alloc_profile(fs_info);
5365 ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
5366 return ret;
5367 }
5368
5369 static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5370 {
5371 const int index = btrfs_bg_flags_to_raid_index(map->type);
5372
5373 return btrfs_raid_array[index].tolerated_failures;
5374 }
5375
5376 int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
5377 {
5378 struct extent_map *em;
5379 struct map_lookup *map;
5380 int readonly = 0;
5381 int miss_ndevs = 0;
5382 int i;
5383
5384 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
5385 if (IS_ERR(em))
5386 return 1;
5387
5388 map = em->map_lookup;
5389 for (i = 0; i < map->num_stripes; i++) {
5390 if (test_bit(BTRFS_DEV_STATE_MISSING,
5391 &map->stripes[i].dev->dev_state)) {
5392 miss_ndevs++;
5393 continue;
5394 }
5395 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5396 &map->stripes[i].dev->dev_state)) {
5397 readonly = 1;
5398 goto end;
5399 }
5400 }
5401
5402
5403
5404
5405
5406
5407 if (miss_ndevs > btrfs_chunk_max_errors(map))
5408 readonly = 1;
5409 end:
5410 free_extent_map(em);
5411 return readonly;
5412 }
5413
5414 void btrfs_mapping_tree_free(struct extent_map_tree *tree)
5415 {
5416 struct extent_map *em;
5417
5418 while (1) {
5419 write_lock(&tree->lock);
5420 em = lookup_extent_mapping(tree, 0, (u64)-1);
5421 if (em)
5422 remove_extent_mapping(tree, em);
5423 write_unlock(&tree->lock);
5424 if (!em)
5425 break;
5426
5427 free_extent_map(em);
5428
5429 free_extent_map(em);
5430 }
5431 }
5432
5433 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5434 {
5435 struct extent_map *em;
5436 struct map_lookup *map;
5437 int ret;
5438
5439 em = btrfs_get_chunk_map(fs_info, logical, len);
5440 if (IS_ERR(em))
5441
5442
5443
5444
5445
5446
5447 return 1;
5448
5449 map = em->map_lookup;
5450 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
5451 ret = map->num_stripes;
5452 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5453 ret = map->sub_stripes;
5454 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
5455 ret = 2;
5456 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5457
5458
5459
5460
5461
5462
5463
5464 ret = map->num_stripes;
5465 else
5466 ret = 1;
5467 free_extent_map(em);
5468
5469 down_read(&fs_info->dev_replace.rwsem);
5470 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
5471 fs_info->dev_replace.tgtdev)
5472 ret++;
5473 up_read(&fs_info->dev_replace.rwsem);
5474
5475 return ret;
5476 }
5477
5478 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
5479 u64 logical)
5480 {
5481 struct extent_map *em;
5482 struct map_lookup *map;
5483 unsigned long len = fs_info->sectorsize;
5484
5485 em = btrfs_get_chunk_map(fs_info, logical, len);
5486
5487 if (!WARN_ON(IS_ERR(em))) {
5488 map = em->map_lookup;
5489 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5490 len = map->stripe_len * nr_data_stripes(map);
5491 free_extent_map(em);
5492 }
5493 return len;
5494 }
5495
5496 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5497 {
5498 struct extent_map *em;
5499 struct map_lookup *map;
5500 int ret = 0;
5501
5502 em = btrfs_get_chunk_map(fs_info, logical, len);
5503
5504 if(!WARN_ON(IS_ERR(em))) {
5505 map = em->map_lookup;
5506 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5507 ret = 1;
5508 free_extent_map(em);
5509 }
5510 return ret;
5511 }
5512
5513 static int find_live_mirror(struct btrfs_fs_info *fs_info,
5514 struct map_lookup *map, int first,
5515 int dev_replace_is_ongoing)
5516 {
5517 int i;
5518 int num_stripes;
5519 int preferred_mirror;
5520 int tolerance;
5521 struct btrfs_device *srcdev;
5522
5523 ASSERT((map->type &
5524 (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
5525
5526 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5527 num_stripes = map->sub_stripes;
5528 else
5529 num_stripes = map->num_stripes;
5530
5531 preferred_mirror = first + current->pid % num_stripes;
5532
5533 if (dev_replace_is_ongoing &&
5534 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
5535 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
5536 srcdev = fs_info->dev_replace.srcdev;
5537 else
5538 srcdev = NULL;
5539
5540
5541
5542
5543
5544
5545 for (tolerance = 0; tolerance < 2; tolerance++) {
5546 if (map->stripes[preferred_mirror].dev->bdev &&
5547 (tolerance || map->stripes[preferred_mirror].dev != srcdev))
5548 return preferred_mirror;
5549 for (i = first; i < first + num_stripes; i++) {
5550 if (map->stripes[i].dev->bdev &&
5551 (tolerance || map->stripes[i].dev != srcdev))
5552 return i;
5553 }
5554 }
5555
5556
5557
5558
5559 return preferred_mirror;
5560 }
5561
5562 static inline int parity_smaller(u64 a, u64 b)
5563 {
5564 return a > b;
5565 }
5566
5567
5568 static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
5569 {
5570 struct btrfs_bio_stripe s;
5571 int i;
5572 u64 l;
5573 int again = 1;
5574
5575 while (again) {
5576 again = 0;
5577 for (i = 0; i < num_stripes - 1; i++) {
5578 if (parity_smaller(bbio->raid_map[i],
5579 bbio->raid_map[i+1])) {
5580 s = bbio->stripes[i];
5581 l = bbio->raid_map[i];
5582 bbio->stripes[i] = bbio->stripes[i+1];
5583 bbio->raid_map[i] = bbio->raid_map[i+1];
5584 bbio->stripes[i+1] = s;
5585 bbio->raid_map[i+1] = l;
5586
5587 again = 1;
5588 }
5589 }
5590 }
5591 }
5592
5593 static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
5594 {
5595 struct btrfs_bio *bbio = kzalloc(
5596
5597 sizeof(struct btrfs_bio) +
5598
5599 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5600
5601 sizeof(int) * (real_stripes) +
5602
5603
5604
5605
5606 sizeof(u64) * (total_stripes),
5607 GFP_NOFS|__GFP_NOFAIL);
5608
5609 atomic_set(&bbio->error, 0);
5610 refcount_set(&bbio->refs, 1);
5611
5612 return bbio;
5613 }
5614
5615 void btrfs_get_bbio(struct btrfs_bio *bbio)
5616 {
5617 WARN_ON(!refcount_read(&bbio->refs));
5618 refcount_inc(&bbio->refs);
5619 }
5620
5621 void btrfs_put_bbio(struct btrfs_bio *bbio)
5622 {
5623 if (!bbio)
5624 return;
5625 if (refcount_dec_and_test(&bbio->refs))
5626 kfree(bbio);
5627 }
5628
5629
5630
5631
5632
5633
5634 static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
5635 u64 logical, u64 length,
5636 struct btrfs_bio **bbio_ret)
5637 {
5638 struct extent_map *em;
5639 struct map_lookup *map;
5640 struct btrfs_bio *bbio;
5641 u64 offset;
5642 u64 stripe_nr;
5643 u64 stripe_nr_end;
5644 u64 stripe_end_offset;
5645 u64 stripe_cnt;
5646 u64 stripe_len;
5647 u64 stripe_offset;
5648 u64 num_stripes;
5649 u32 stripe_index;
5650 u32 factor = 0;
5651 u32 sub_stripes = 0;
5652 u64 stripes_per_dev = 0;
5653 u32 remaining_stripes = 0;
5654 u32 last_stripe = 0;
5655 int ret = 0;
5656 int i;
5657
5658
5659 ASSERT(bbio_ret);
5660
5661 em = btrfs_get_chunk_map(fs_info, logical, length);
5662 if (IS_ERR(em))
5663 return PTR_ERR(em);
5664
5665 map = em->map_lookup;
5666
5667 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5668 ret = -EOPNOTSUPP;
5669 goto out;
5670 }
5671
5672 offset = logical - em->start;
5673 length = min_t(u64, em->len - offset, length);
5674
5675 stripe_len = map->stripe_len;
5676
5677
5678
5679
5680 stripe_nr = div64_u64(offset, stripe_len);
5681
5682
5683 stripe_offset = offset - stripe_nr * stripe_len;
5684
5685 stripe_nr_end = round_up(offset + length, map->stripe_len);
5686 stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
5687 stripe_cnt = stripe_nr_end - stripe_nr;
5688 stripe_end_offset = stripe_nr_end * map->stripe_len -
5689 (offset + length);
5690
5691
5692
5693
5694
5695 num_stripes = 1;
5696 stripe_index = 0;
5697 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5698 BTRFS_BLOCK_GROUP_RAID10)) {
5699 if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5700 sub_stripes = 1;
5701 else
5702 sub_stripes = map->sub_stripes;
5703
5704 factor = map->num_stripes / sub_stripes;
5705 num_stripes = min_t(u64, map->num_stripes,
5706 sub_stripes * stripe_cnt);
5707 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5708 stripe_index *= sub_stripes;
5709 stripes_per_dev = div_u64_rem(stripe_cnt, factor,
5710 &remaining_stripes);
5711 div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
5712 last_stripe *= sub_stripes;
5713 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
5714 BTRFS_BLOCK_GROUP_DUP)) {
5715 num_stripes = map->num_stripes;
5716 } else {
5717 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5718 &stripe_index);
5719 }
5720
5721 bbio = alloc_btrfs_bio(num_stripes, 0);
5722 if (!bbio) {
5723 ret = -ENOMEM;
5724 goto out;
5725 }
5726
5727 for (i = 0; i < num_stripes; i++) {
5728 bbio->stripes[i].physical =
5729 map->stripes[stripe_index].physical +
5730 stripe_offset + stripe_nr * map->stripe_len;
5731 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
5732
5733 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5734 BTRFS_BLOCK_GROUP_RAID10)) {
5735 bbio->stripes[i].length = stripes_per_dev *
5736 map->stripe_len;
5737
5738 if (i / sub_stripes < remaining_stripes)
5739 bbio->stripes[i].length +=
5740 map->stripe_len;
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750 if (i < sub_stripes)
5751 bbio->stripes[i].length -=
5752 stripe_offset;
5753
5754 if (stripe_index >= last_stripe &&
5755 stripe_index <= (last_stripe +
5756 sub_stripes - 1))
5757 bbio->stripes[i].length -=
5758 stripe_end_offset;
5759
5760 if (i == sub_stripes - 1)
5761 stripe_offset = 0;
5762 } else {
5763 bbio->stripes[i].length = length;
5764 }
5765
5766 stripe_index++;
5767 if (stripe_index == map->num_stripes) {
5768 stripe_index = 0;
5769 stripe_nr++;
5770 }
5771 }
5772
5773 *bbio_ret = bbio;
5774 bbio->map_type = map->type;
5775 bbio->num_stripes = num_stripes;
5776 out:
5777 free_extent_map(em);
5778 return ret;
5779 }
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794 static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
5795 u64 logical, u64 length,
5796 u64 srcdev_devid, int *mirror_num,
5797 u64 *physical)
5798 {
5799 struct btrfs_bio *bbio = NULL;
5800 int num_stripes;
5801 int index_srcdev = 0;
5802 int found = 0;
5803 u64 physical_of_found = 0;
5804 int i;
5805 int ret = 0;
5806
5807 ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
5808 logical, &length, &bbio, 0, 0);
5809 if (ret) {
5810 ASSERT(bbio == NULL);
5811 return ret;
5812 }
5813
5814 num_stripes = bbio->num_stripes;
5815 if (*mirror_num > num_stripes) {
5816
5817
5818
5819
5820
5821 btrfs_put_bbio(bbio);
5822 return -EIO;
5823 }
5824
5825
5826
5827
5828
5829
5830 for (i = 0; i < num_stripes; i++) {
5831 if (bbio->stripes[i].dev->devid != srcdev_devid)
5832 continue;
5833
5834
5835
5836
5837
5838 if (found &&
5839 physical_of_found <= bbio->stripes[i].physical)
5840 continue;
5841
5842 index_srcdev = i;
5843 found = 1;
5844 physical_of_found = bbio->stripes[i].physical;
5845 }
5846
5847 btrfs_put_bbio(bbio);
5848
5849 ASSERT(found);
5850 if (!found)
5851 return -EIO;
5852
5853 *mirror_num = index_srcdev + 1;
5854 *physical = physical_of_found;
5855 return ret;
5856 }
5857
5858 static void handle_ops_on_dev_replace(enum btrfs_map_op op,
5859 struct btrfs_bio **bbio_ret,
5860 struct btrfs_dev_replace *dev_replace,
5861 int *num_stripes_ret, int *max_errors_ret)
5862 {
5863 struct btrfs_bio *bbio = *bbio_ret;
5864 u64 srcdev_devid = dev_replace->srcdev->devid;
5865 int tgtdev_indexes = 0;
5866 int num_stripes = *num_stripes_ret;
5867 int max_errors = *max_errors_ret;
5868 int i;
5869
5870 if (op == BTRFS_MAP_WRITE) {
5871 int index_where_to_add;
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884 index_where_to_add = num_stripes;
5885 for (i = 0; i < num_stripes; i++) {
5886 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5887
5888 struct btrfs_bio_stripe *new =
5889 bbio->stripes + index_where_to_add;
5890 struct btrfs_bio_stripe *old =
5891 bbio->stripes + i;
5892
5893 new->physical = old->physical;
5894 new->length = old->length;
5895 new->dev = dev_replace->tgtdev;
5896 bbio->tgtdev_map[i] = index_where_to_add;
5897 index_where_to_add++;
5898 max_errors++;
5899 tgtdev_indexes++;
5900 }
5901 }
5902 num_stripes = index_where_to_add;
5903 } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
5904 int index_srcdev = 0;
5905 int found = 0;
5906 u64 physical_of_found = 0;
5907
5908
5909
5910
5911
5912
5913
5914
5915 for (i = 0; i < num_stripes; i++) {
5916 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5917
5918
5919
5920
5921
5922 if (found &&
5923 physical_of_found <=
5924 bbio->stripes[i].physical)
5925 continue;
5926 index_srcdev = i;
5927 found = 1;
5928 physical_of_found = bbio->stripes[i].physical;
5929 }
5930 }
5931 if (found) {
5932 struct btrfs_bio_stripe *tgtdev_stripe =
5933 bbio->stripes + num_stripes;
5934
5935 tgtdev_stripe->physical = physical_of_found;
5936 tgtdev_stripe->length =
5937 bbio->stripes[index_srcdev].length;
5938 tgtdev_stripe->dev = dev_replace->tgtdev;
5939 bbio->tgtdev_map[index_srcdev] = num_stripes;
5940
5941 tgtdev_indexes++;
5942 num_stripes++;
5943 }
5944 }
5945
5946 *num_stripes_ret = num_stripes;
5947 *max_errors_ret = max_errors;
5948 bbio->num_tgtdevs = tgtdev_indexes;
5949 *bbio_ret = bbio;
5950 }
5951
5952 static bool need_full_stripe(enum btrfs_map_op op)
5953 {
5954 return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
5955 }
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971 int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
5972 u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
5973 {
5974 struct extent_map *em;
5975 struct map_lookup *map;
5976 u64 offset;
5977 u64 stripe_offset;
5978 u64 stripe_nr;
5979 u64 stripe_len;
5980 u64 raid56_full_stripe_start = (u64)-1;
5981 int data_stripes;
5982 int ret = 0;
5983
5984 ASSERT(op != BTRFS_MAP_DISCARD);
5985
5986 em = btrfs_get_chunk_map(fs_info, logical, len);
5987 if (IS_ERR(em))
5988 return PTR_ERR(em);
5989
5990 map = em->map_lookup;
5991
5992 offset = logical - em->start;
5993
5994 stripe_len = map->stripe_len;
5995
5996 stripe_nr = div64_u64(offset, stripe_len);
5997
5998 stripe_offset = stripe_nr * stripe_len;
5999 if (offset < stripe_offset) {
6000 btrfs_crit(fs_info,
6001 "stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
6002 stripe_offset, offset, em->start, logical, stripe_len);
6003 ret = -EINVAL;
6004 goto out;
6005 }
6006
6007
6008 stripe_offset = offset - stripe_offset;
6009 data_stripes = nr_data_stripes(map);
6010
6011 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
6012 u64 max_len = stripe_len - stripe_offset;
6013
6014
6015
6016
6017 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6018 unsigned long full_stripe_len = stripe_len * data_stripes;
6019 raid56_full_stripe_start = offset;
6020
6021
6022
6023
6024
6025 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
6026 full_stripe_len);
6027 raid56_full_stripe_start *= full_stripe_len;
6028
6029
6030
6031
6032
6033
6034 if (op == BTRFS_MAP_WRITE) {
6035 max_len = stripe_len * data_stripes -
6036 (offset - raid56_full_stripe_start);
6037 }
6038 }
6039 len = min_t(u64, em->len - offset, max_len);
6040 } else {
6041 len = em->len - offset;
6042 }
6043
6044 io_geom->len = len;
6045 io_geom->offset = offset;
6046 io_geom->stripe_len = stripe_len;
6047 io_geom->stripe_nr = stripe_nr;
6048 io_geom->stripe_offset = stripe_offset;
6049 io_geom->raid56_stripe_offset = raid56_full_stripe_start;
6050
6051 out:
6052
6053 free_extent_map(em);
6054 return ret;
6055 }
6056
6057 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
6058 enum btrfs_map_op op,
6059 u64 logical, u64 *length,
6060 struct btrfs_bio **bbio_ret,
6061 int mirror_num, int need_raid_map)
6062 {
6063 struct extent_map *em;
6064 struct map_lookup *map;
6065 u64 stripe_offset;
6066 u64 stripe_nr;
6067 u64 stripe_len;
6068 u32 stripe_index;
6069 int data_stripes;
6070 int i;
6071 int ret = 0;
6072 int num_stripes;
6073 int max_errors = 0;
6074 int tgtdev_indexes = 0;
6075 struct btrfs_bio *bbio = NULL;
6076 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
6077 int dev_replace_is_ongoing = 0;
6078 int num_alloc_stripes;
6079 int patch_the_first_stripe_for_dev_replace = 0;
6080 u64 physical_to_patch_in_first_stripe = 0;
6081 u64 raid56_full_stripe_start = (u64)-1;
6082 struct btrfs_io_geometry geom;
6083
6084 ASSERT(bbio_ret);
6085
6086 if (op == BTRFS_MAP_DISCARD)
6087 return __btrfs_map_block_for_discard(fs_info, logical,
6088 *length, bbio_ret);
6089
6090 ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
6091 if (ret < 0)
6092 return ret;
6093
6094 em = btrfs_get_chunk_map(fs_info, logical, *length);
6095 ASSERT(!IS_ERR(em));
6096 map = em->map_lookup;
6097
6098 *length = geom.len;
6099 stripe_len = geom.stripe_len;
6100 stripe_nr = geom.stripe_nr;
6101 stripe_offset = geom.stripe_offset;
6102 raid56_full_stripe_start = geom.raid56_stripe_offset;
6103 data_stripes = nr_data_stripes(map);
6104
6105 down_read(&dev_replace->rwsem);
6106 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
6107
6108
6109
6110
6111 if (!dev_replace_is_ongoing)
6112 up_read(&dev_replace->rwsem);
6113
6114 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
6115 !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
6116 ret = get_extra_mirror_from_replace(fs_info, logical, *length,
6117 dev_replace->srcdev->devid,
6118 &mirror_num,
6119 &physical_to_patch_in_first_stripe);
6120 if (ret)
6121 goto out;
6122 else
6123 patch_the_first_stripe_for_dev_replace = 1;
6124 } else if (mirror_num > map->num_stripes) {
6125 mirror_num = 0;
6126 }
6127
6128 num_stripes = 1;
6129 stripe_index = 0;
6130 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6131 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6132 &stripe_index);
6133 if (!need_full_stripe(op))
6134 mirror_num = 1;
6135 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
6136 if (need_full_stripe(op))
6137 num_stripes = map->num_stripes;
6138 else if (mirror_num)
6139 stripe_index = mirror_num - 1;
6140 else {
6141 stripe_index = find_live_mirror(fs_info, map, 0,
6142 dev_replace_is_ongoing);
6143 mirror_num = stripe_index + 1;
6144 }
6145
6146 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
6147 if (need_full_stripe(op)) {
6148 num_stripes = map->num_stripes;
6149 } else if (mirror_num) {
6150 stripe_index = mirror_num - 1;
6151 } else {
6152 mirror_num = 1;
6153 }
6154
6155 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6156 u32 factor = map->num_stripes / map->sub_stripes;
6157
6158 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
6159 stripe_index *= map->sub_stripes;
6160
6161 if (need_full_stripe(op))
6162 num_stripes = map->sub_stripes;
6163 else if (mirror_num)
6164 stripe_index += mirror_num - 1;
6165 else {
6166 int old_stripe_index = stripe_index;
6167 stripe_index = find_live_mirror(fs_info, map,
6168 stripe_index,
6169 dev_replace_is_ongoing);
6170 mirror_num = stripe_index - old_stripe_index + 1;
6171 }
6172
6173 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6174 if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
6175
6176 stripe_nr = div64_u64(raid56_full_stripe_start,
6177 stripe_len * data_stripes);
6178
6179
6180 num_stripes = map->num_stripes;
6181 max_errors = nr_parity_stripes(map);
6182
6183 *length = map->stripe_len;
6184 stripe_index = 0;
6185 stripe_offset = 0;
6186 } else {
6187
6188
6189
6190
6191
6192 stripe_nr = div_u64_rem(stripe_nr,
6193 data_stripes, &stripe_index);
6194 if (mirror_num > 1)
6195 stripe_index = data_stripes + mirror_num - 2;
6196
6197
6198 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
6199 &stripe_index);
6200 if (!need_full_stripe(op) && mirror_num <= 1)
6201 mirror_num = 1;
6202 }
6203 } else {
6204
6205
6206
6207
6208
6209 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6210 &stripe_index);
6211 mirror_num = stripe_index + 1;
6212 }
6213 if (stripe_index >= map->num_stripes) {
6214 btrfs_crit(fs_info,
6215 "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
6216 stripe_index, map->num_stripes);
6217 ret = -EINVAL;
6218 goto out;
6219 }
6220
6221 num_alloc_stripes = num_stripes;
6222 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
6223 if (op == BTRFS_MAP_WRITE)
6224 num_alloc_stripes <<= 1;
6225 if (op == BTRFS_MAP_GET_READ_MIRRORS)
6226 num_alloc_stripes++;
6227 tgtdev_indexes = num_stripes;
6228 }
6229
6230 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
6231 if (!bbio) {
6232 ret = -ENOMEM;
6233 goto out;
6234 }
6235 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
6236 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
6237
6238
6239 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
6240 (need_full_stripe(op) || mirror_num > 1)) {
6241 u64 tmp;
6242 unsigned rot;
6243
6244 bbio->raid_map = (u64 *)((void *)bbio->stripes +
6245 sizeof(struct btrfs_bio_stripe) *
6246 num_alloc_stripes +
6247 sizeof(int) * tgtdev_indexes);
6248
6249
6250 div_u64_rem(stripe_nr, num_stripes, &rot);
6251
6252
6253 tmp = stripe_nr * data_stripes;
6254 for (i = 0; i < data_stripes; i++)
6255 bbio->raid_map[(i+rot) % num_stripes] =
6256 em->start + (tmp + i) * map->stripe_len;
6257
6258 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
6259 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
6260 bbio->raid_map[(i+rot+1) % num_stripes] =
6261 RAID6_Q_STRIPE;
6262 }
6263
6264
6265 for (i = 0; i < num_stripes; i++) {
6266 bbio->stripes[i].physical =
6267 map->stripes[stripe_index].physical +
6268 stripe_offset +
6269 stripe_nr * map->stripe_len;
6270 bbio->stripes[i].dev =
6271 map->stripes[stripe_index].dev;
6272 stripe_index++;
6273 }
6274
6275 if (need_full_stripe(op))
6276 max_errors = btrfs_chunk_max_errors(map);
6277
6278 if (bbio->raid_map)
6279 sort_parity_stripes(bbio, num_stripes);
6280
6281 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
6282 need_full_stripe(op)) {
6283 handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
6284 &max_errors);
6285 }
6286
6287 *bbio_ret = bbio;
6288 bbio->map_type = map->type;
6289 bbio->num_stripes = num_stripes;
6290 bbio->max_errors = max_errors;
6291 bbio->mirror_num = mirror_num;
6292
6293
6294
6295
6296
6297
6298 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
6299 WARN_ON(num_stripes > 1);
6300 bbio->stripes[0].dev = dev_replace->tgtdev;
6301 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
6302 bbio->mirror_num = map->num_stripes + 1;
6303 }
6304 out:
6305 if (dev_replace_is_ongoing) {
6306 lockdep_assert_held(&dev_replace->rwsem);
6307
6308 up_read(&dev_replace->rwsem);
6309 }
6310 free_extent_map(em);
6311 return ret;
6312 }
6313
6314 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6315 u64 logical, u64 *length,
6316 struct btrfs_bio **bbio_ret, int mirror_num)
6317 {
6318 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
6319 mirror_num, 0);
6320 }
6321
6322
6323 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6324 u64 logical, u64 *length,
6325 struct btrfs_bio **bbio_ret)
6326 {
6327 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6328 }
6329
6330 int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
6331 u64 physical, u64 **logical, int *naddrs, int *stripe_len)
6332 {
6333 struct extent_map *em;
6334 struct map_lookup *map;
6335 u64 *buf;
6336 u64 bytenr;
6337 u64 length;
6338 u64 stripe_nr;
6339 u64 rmap_len;
6340 int i, j, nr = 0;
6341
6342 em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
6343 if (IS_ERR(em))
6344 return -EIO;
6345
6346 map = em->map_lookup;
6347 length = em->len;
6348 rmap_len = map->stripe_len;
6349
6350 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
6351 length = div_u64(length, map->num_stripes / map->sub_stripes);
6352 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
6353 length = div_u64(length, map->num_stripes);
6354 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6355 length = div_u64(length, nr_data_stripes(map));
6356 rmap_len = map->stripe_len * nr_data_stripes(map);
6357 }
6358
6359 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
6360 BUG_ON(!buf);
6361
6362 for (i = 0; i < map->num_stripes; i++) {
6363 if (map->stripes[i].physical > physical ||
6364 map->stripes[i].physical + length <= physical)
6365 continue;
6366
6367 stripe_nr = physical - map->stripes[i].physical;
6368 stripe_nr = div64_u64(stripe_nr, map->stripe_len);
6369
6370 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6371 stripe_nr = stripe_nr * map->num_stripes + i;
6372 stripe_nr = div_u64(stripe_nr, map->sub_stripes);
6373 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6374 stripe_nr = stripe_nr * map->num_stripes + i;
6375 }
6376
6377
6378
6379 bytenr = chunk_start + stripe_nr * rmap_len;
6380 WARN_ON(nr >= map->num_stripes);
6381 for (j = 0; j < nr; j++) {
6382 if (buf[j] == bytenr)
6383 break;
6384 }
6385 if (j == nr) {
6386 WARN_ON(nr >= map->num_stripes);
6387 buf[nr++] = bytenr;
6388 }
6389 }
6390
6391 *logical = buf;
6392 *naddrs = nr;
6393 *stripe_len = rmap_len;
6394
6395 free_extent_map(em);
6396 return 0;
6397 }
6398
6399 static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
6400 {
6401 bio->bi_private = bbio->private;
6402 bio->bi_end_io = bbio->end_io;
6403 bio_endio(bio);
6404
6405 btrfs_put_bbio(bbio);
6406 }
6407
6408 static void btrfs_end_bio(struct bio *bio)
6409 {
6410 struct btrfs_bio *bbio = bio->bi_private;
6411 int is_orig_bio = 0;
6412
6413 if (bio->bi_status) {
6414 atomic_inc(&bbio->error);
6415 if (bio->bi_status == BLK_STS_IOERR ||
6416 bio->bi_status == BLK_STS_TARGET) {
6417 unsigned int stripe_index =
6418 btrfs_io_bio(bio)->stripe_index;
6419 struct btrfs_device *dev;
6420
6421 BUG_ON(stripe_index >= bbio->num_stripes);
6422 dev = bbio->stripes[stripe_index].dev;
6423 if (dev->bdev) {
6424 if (bio_op(bio) == REQ_OP_WRITE)
6425 btrfs_dev_stat_inc_and_print(dev,
6426 BTRFS_DEV_STAT_WRITE_ERRS);
6427 else if (!(bio->bi_opf & REQ_RAHEAD))
6428 btrfs_dev_stat_inc_and_print(dev,
6429 BTRFS_DEV_STAT_READ_ERRS);
6430 if (bio->bi_opf & REQ_PREFLUSH)
6431 btrfs_dev_stat_inc_and_print(dev,
6432 BTRFS_DEV_STAT_FLUSH_ERRS);
6433 }
6434 }
6435 }
6436
6437 if (bio == bbio->orig_bio)
6438 is_orig_bio = 1;
6439
6440 btrfs_bio_counter_dec(bbio->fs_info);
6441
6442 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6443 if (!is_orig_bio) {
6444 bio_put(bio);
6445 bio = bbio->orig_bio;
6446 }
6447
6448 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6449
6450
6451
6452 if (atomic_read(&bbio->error) > bbio->max_errors) {
6453 bio->bi_status = BLK_STS_IOERR;
6454 } else {
6455
6456
6457
6458
6459 bio->bi_status = BLK_STS_OK;
6460 }
6461
6462 btrfs_end_bbio(bbio, bio);
6463 } else if (!is_orig_bio) {
6464 bio_put(bio);
6465 }
6466 }
6467
6468
6469
6470
6471
6472
6473
6474
6475 static noinline void btrfs_schedule_bio(struct btrfs_device *device,
6476 struct bio *bio)
6477 {
6478 struct btrfs_fs_info *fs_info = device->fs_info;
6479 int should_queue = 1;
6480 struct btrfs_pending_bios *pending_bios;
6481
6482
6483 if (bio_op(bio) == REQ_OP_READ) {
6484 btrfsic_submit_bio(bio);
6485 return;
6486 }
6487
6488 WARN_ON(bio->bi_next);
6489 bio->bi_next = NULL;
6490
6491 spin_lock(&device->io_lock);
6492 if (op_is_sync(bio->bi_opf))
6493 pending_bios = &device->pending_sync_bios;
6494 else
6495 pending_bios = &device->pending_bios;
6496
6497 if (pending_bios->tail)
6498 pending_bios->tail->bi_next = bio;
6499
6500 pending_bios->tail = bio;
6501 if (!pending_bios->head)
6502 pending_bios->head = bio;
6503 if (device->running_pending)
6504 should_queue = 0;
6505
6506 spin_unlock(&device->io_lock);
6507
6508 if (should_queue)
6509 btrfs_queue_work(fs_info->submit_workers, &device->work);
6510 }
6511
6512 static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6513 u64 physical, int dev_nr, int async)
6514 {
6515 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
6516 struct btrfs_fs_info *fs_info = bbio->fs_info;
6517
6518 bio->bi_private = bbio;
6519 btrfs_io_bio(bio)->stripe_index = dev_nr;
6520 bio->bi_end_io = btrfs_end_bio;
6521 bio->bi_iter.bi_sector = physical >> 9;
6522 btrfs_debug_in_rcu(fs_info,
6523 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6524 bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
6525 (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
6526 bio->bi_iter.bi_size);
6527 bio_set_dev(bio, dev->bdev);
6528
6529 btrfs_bio_counter_inc_noblocked(fs_info);
6530
6531 if (async)
6532 btrfs_schedule_bio(dev, bio);
6533 else
6534 btrfsic_submit_bio(bio);
6535 }
6536
6537 static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6538 {
6539 atomic_inc(&bbio->error);
6540 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6541
6542 WARN_ON(bio != bbio->orig_bio);
6543
6544 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6545 bio->bi_iter.bi_sector = logical >> 9;
6546 if (atomic_read(&bbio->error) > bbio->max_errors)
6547 bio->bi_status = BLK_STS_IOERR;
6548 else
6549 bio->bi_status = BLK_STS_OK;
6550 btrfs_end_bbio(bbio, bio);
6551 }
6552 }
6553
6554 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6555 int mirror_num, int async_submit)
6556 {
6557 struct btrfs_device *dev;
6558 struct bio *first_bio = bio;
6559 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
6560 u64 length = 0;
6561 u64 map_length;
6562 int ret;
6563 int dev_nr;
6564 int total_devs;
6565 struct btrfs_bio *bbio = NULL;
6566
6567 length = bio->bi_iter.bi_size;
6568 map_length = length;
6569
6570 btrfs_bio_counter_inc_blocked(fs_info);
6571 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6572 &map_length, &bbio, mirror_num, 1);
6573 if (ret) {
6574 btrfs_bio_counter_dec(fs_info);
6575 return errno_to_blk_status(ret);
6576 }
6577
6578 total_devs = bbio->num_stripes;
6579 bbio->orig_bio = first_bio;
6580 bbio->private = first_bio->bi_private;
6581 bbio->end_io = first_bio->bi_end_io;
6582 bbio->fs_info = fs_info;
6583 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
6584
6585 if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
6586 ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
6587
6588
6589 if (bio_op(bio) == REQ_OP_WRITE) {
6590 ret = raid56_parity_write(fs_info, bio, bbio,
6591 map_length);
6592 } else {
6593 ret = raid56_parity_recover(fs_info, bio, bbio,
6594 map_length, mirror_num, 1);
6595 }
6596
6597 btrfs_bio_counter_dec(fs_info);
6598 return errno_to_blk_status(ret);
6599 }
6600
6601 if (map_length < length) {
6602 btrfs_crit(fs_info,
6603 "mapping failed logical %llu bio len %llu len %llu",
6604 logical, length, map_length);
6605 BUG();
6606 }
6607
6608 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6609 dev = bbio->stripes[dev_nr].dev;
6610 if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
6611 &dev->dev_state) ||
6612 (bio_op(first_bio) == REQ_OP_WRITE &&
6613 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6614 bbio_error(bbio, first_bio, logical);
6615 continue;
6616 }
6617
6618 if (dev_nr < total_devs - 1)
6619 bio = btrfs_bio_clone(first_bio);
6620 else
6621 bio = first_bio;
6622
6623 submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
6624 dev_nr, async_submit);
6625 }
6626 btrfs_bio_counter_dec(fs_info);
6627 return BLK_STS_OK;
6628 }
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639 struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
6640 u64 devid, u8 *uuid, u8 *fsid,
6641 bool seed)
6642 {
6643 struct btrfs_device *device;
6644
6645 while (fs_devices) {
6646 if (!fsid ||
6647 !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6648 list_for_each_entry(device, &fs_devices->devices,
6649 dev_list) {
6650 if (device->devid == devid &&
6651 (!uuid || memcmp(device->uuid, uuid,
6652 BTRFS_UUID_SIZE) == 0))
6653 return device;
6654 }
6655 }
6656 if (seed)
6657 fs_devices = fs_devices->seed;
6658 else
6659 return NULL;
6660 }
6661 return NULL;
6662 }
6663
6664 static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6665 u64 devid, u8 *dev_uuid)
6666 {
6667 struct btrfs_device *device;
6668
6669 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
6670 if (IS_ERR(device))
6671 return device;
6672
6673 list_add(&device->dev_list, &fs_devices->devices);
6674 device->fs_devices = fs_devices;
6675 fs_devices->num_devices++;
6676
6677 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6678 fs_devices->missing_devices++;
6679
6680 return device;
6681 }
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6697 const u64 *devid,
6698 const u8 *uuid)
6699 {
6700 struct btrfs_device *dev;
6701 u64 tmp;
6702
6703 if (WARN_ON(!devid && !fs_info))
6704 return ERR_PTR(-EINVAL);
6705
6706 dev = __alloc_device();
6707 if (IS_ERR(dev))
6708 return dev;
6709
6710 if (devid)
6711 tmp = *devid;
6712 else {
6713 int ret;
6714
6715 ret = find_next_devid(fs_info, &tmp);
6716 if (ret) {
6717 btrfs_free_device(dev);
6718 return ERR_PTR(ret);
6719 }
6720 }
6721 dev->devid = tmp;
6722
6723 if (uuid)
6724 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
6725 else
6726 generate_random_uuid(dev->uuid);
6727
6728 btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL);
6729
6730 return dev;
6731 }
6732
6733 static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6734 u64 devid, u8 *uuid, bool error)
6735 {
6736 if (error)
6737 btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
6738 devid, uuid);
6739 else
6740 btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
6741 devid, uuid);
6742 }
6743
6744 static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
6745 {
6746 int index = btrfs_bg_flags_to_raid_index(type);
6747 int ncopies = btrfs_raid_array[index].ncopies;
6748 int data_stripes;
6749
6750 switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
6751 case BTRFS_BLOCK_GROUP_RAID5:
6752 data_stripes = num_stripes - 1;
6753 break;
6754 case BTRFS_BLOCK_GROUP_RAID6:
6755 data_stripes = num_stripes - 2;
6756 break;
6757 default:
6758 data_stripes = num_stripes / ncopies;
6759 break;
6760 }
6761 return div_u64(chunk_len, data_stripes);
6762 }
6763
6764 static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
6765 struct btrfs_chunk *chunk)
6766 {
6767 struct btrfs_fs_info *fs_info = leaf->fs_info;
6768 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
6769 struct map_lookup *map;
6770 struct extent_map *em;
6771 u64 logical;
6772 u64 length;
6773 u64 devid;
6774 u8 uuid[BTRFS_UUID_SIZE];
6775 int num_stripes;
6776 int ret;
6777 int i;
6778
6779 logical = key->offset;
6780 length = btrfs_chunk_length(leaf, chunk);
6781 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6782
6783
6784
6785
6786
6787 if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
6788 ret = btrfs_check_chunk_valid(leaf, chunk, logical);
6789 if (ret)
6790 return ret;
6791 }
6792
6793 read_lock(&map_tree->lock);
6794 em = lookup_extent_mapping(map_tree, logical, 1);
6795 read_unlock(&map_tree->lock);
6796
6797
6798 if (em && em->start <= logical && em->start + em->len > logical) {
6799 free_extent_map(em);
6800 return 0;
6801 } else if (em) {
6802 free_extent_map(em);
6803 }
6804
6805 em = alloc_extent_map();
6806 if (!em)
6807 return -ENOMEM;
6808 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
6809 if (!map) {
6810 free_extent_map(em);
6811 return -ENOMEM;
6812 }
6813
6814 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
6815 em->map_lookup = map;
6816 em->start = logical;
6817 em->len = length;
6818 em->orig_start = 0;
6819 em->block_start = 0;
6820 em->block_len = em->len;
6821
6822 map->num_stripes = num_stripes;
6823 map->io_width = btrfs_chunk_io_width(leaf, chunk);
6824 map->io_align = btrfs_chunk_io_align(leaf, chunk);
6825 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6826 map->type = btrfs_chunk_type(leaf, chunk);
6827 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6828 map->verified_stripes = 0;
6829 em->orig_block_len = calc_stripe_length(map->type, em->len,
6830 map->num_stripes);
6831 for (i = 0; i < num_stripes; i++) {
6832 map->stripes[i].physical =
6833 btrfs_stripe_offset_nr(leaf, chunk, i);
6834 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
6835 read_extent_buffer(leaf, uuid, (unsigned long)
6836 btrfs_stripe_dev_uuid_nr(chunk, i),
6837 BTRFS_UUID_SIZE);
6838 map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
6839 devid, uuid, NULL, true);
6840 if (!map->stripes[i].dev &&
6841 !btrfs_test_opt(fs_info, DEGRADED)) {
6842 free_extent_map(em);
6843 btrfs_report_missing_device(fs_info, devid, uuid, true);
6844 return -ENOENT;
6845 }
6846 if (!map->stripes[i].dev) {
6847 map->stripes[i].dev =
6848 add_missing_dev(fs_info->fs_devices, devid,
6849 uuid);
6850 if (IS_ERR(map->stripes[i].dev)) {
6851 free_extent_map(em);
6852 btrfs_err(fs_info,
6853 "failed to init missing dev %llu: %ld",
6854 devid, PTR_ERR(map->stripes[i].dev));
6855 return PTR_ERR(map->stripes[i].dev);
6856 }
6857 btrfs_report_missing_device(fs_info, devid, uuid, false);
6858 }
6859 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
6860 &(map->stripes[i].dev->dev_state));
6861
6862 }
6863
6864 write_lock(&map_tree->lock);
6865 ret = add_extent_mapping(map_tree, em, 0);
6866 write_unlock(&map_tree->lock);
6867 if (ret < 0) {
6868 btrfs_err(fs_info,
6869 "failed to add chunk map, start=%llu len=%llu: %d",
6870 em->start, em->len, ret);
6871 }
6872 free_extent_map(em);
6873
6874 return ret;
6875 }
6876
6877 static void fill_device_from_item(struct extent_buffer *leaf,
6878 struct btrfs_dev_item *dev_item,
6879 struct btrfs_device *device)
6880 {
6881 unsigned long ptr;
6882
6883 device->devid = btrfs_device_id(leaf, dev_item);
6884 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
6885 device->total_bytes = device->disk_total_bytes;
6886 device->commit_total_bytes = device->disk_total_bytes;
6887 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
6888 device->commit_bytes_used = device->bytes_used;
6889 device->type = btrfs_device_type(leaf, dev_item);
6890 device->io_align = btrfs_device_io_align(leaf, dev_item);
6891 device->io_width = btrfs_device_io_width(leaf, dev_item);
6892 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
6893 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
6894 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
6895
6896 ptr = btrfs_device_uuid(dev_item);
6897 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
6898 }
6899
6900 static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
6901 u8 *fsid)
6902 {
6903 struct btrfs_fs_devices *fs_devices;
6904 int ret;
6905
6906 lockdep_assert_held(&uuid_mutex);
6907 ASSERT(fsid);
6908
6909 fs_devices = fs_info->fs_devices->seed;
6910 while (fs_devices) {
6911 if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
6912 return fs_devices;
6913
6914 fs_devices = fs_devices->seed;
6915 }
6916
6917 fs_devices = find_fsid(fsid, NULL);
6918 if (!fs_devices) {
6919 if (!btrfs_test_opt(fs_info, DEGRADED))
6920 return ERR_PTR(-ENOENT);
6921
6922 fs_devices = alloc_fs_devices(fsid, NULL);
6923 if (IS_ERR(fs_devices))
6924 return fs_devices;
6925
6926 fs_devices->seeding = 1;
6927 fs_devices->opened = 1;
6928 return fs_devices;
6929 }
6930
6931 fs_devices = clone_fs_devices(fs_devices);
6932 if (IS_ERR(fs_devices))
6933 return fs_devices;
6934
6935 ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
6936 if (ret) {
6937 free_fs_devices(fs_devices);
6938 fs_devices = ERR_PTR(ret);
6939 goto out;
6940 }
6941
6942 if (!fs_devices->seeding) {
6943 close_fs_devices(fs_devices);
6944 free_fs_devices(fs_devices);
6945 fs_devices = ERR_PTR(-EINVAL);
6946 goto out;
6947 }
6948
6949 fs_devices->seed = fs_info->fs_devices->seed;
6950 fs_info->fs_devices->seed = fs_devices;
6951 out:
6952 return fs_devices;
6953 }
6954
6955 static int read_one_dev(struct extent_buffer *leaf,
6956 struct btrfs_dev_item *dev_item)
6957 {
6958 struct btrfs_fs_info *fs_info = leaf->fs_info;
6959 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
6960 struct btrfs_device *device;
6961 u64 devid;
6962 int ret;
6963 u8 fs_uuid[BTRFS_FSID_SIZE];
6964 u8 dev_uuid[BTRFS_UUID_SIZE];
6965
6966 devid = btrfs_device_id(leaf, dev_item);
6967 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
6968 BTRFS_UUID_SIZE);
6969 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
6970 BTRFS_FSID_SIZE);
6971
6972 if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
6973 fs_devices = open_seed_devices(fs_info, fs_uuid);
6974 if (IS_ERR(fs_devices))
6975 return PTR_ERR(fs_devices);
6976 }
6977
6978 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
6979 fs_uuid, true);
6980 if (!device) {
6981 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6982 btrfs_report_missing_device(fs_info, devid,
6983 dev_uuid, true);
6984 return -ENOENT;
6985 }
6986
6987 device = add_missing_dev(fs_devices, devid, dev_uuid);
6988 if (IS_ERR(device)) {
6989 btrfs_err(fs_info,
6990 "failed to add missing dev %llu: %ld",
6991 devid, PTR_ERR(device));
6992 return PTR_ERR(device);
6993 }
6994 btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
6995 } else {
6996 if (!device->bdev) {
6997 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6998 btrfs_report_missing_device(fs_info,
6999 devid, dev_uuid, true);
7000 return -ENOENT;
7001 }
7002 btrfs_report_missing_device(fs_info, devid,
7003 dev_uuid, false);
7004 }
7005
7006 if (!device->bdev &&
7007 !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
7008
7009
7010
7011
7012
7013
7014 device->fs_devices->missing_devices++;
7015 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
7016 }
7017
7018
7019 if (device->fs_devices != fs_devices) {
7020 ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
7021 &device->dev_state));
7022
7023 list_move(&device->dev_list, &fs_devices->devices);
7024 device->fs_devices->num_devices--;
7025 fs_devices->num_devices++;
7026
7027 device->fs_devices->missing_devices--;
7028 fs_devices->missing_devices++;
7029
7030 device->fs_devices = fs_devices;
7031 }
7032 }
7033
7034 if (device->fs_devices != fs_info->fs_devices) {
7035 BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
7036 if (device->generation !=
7037 btrfs_device_generation(leaf, dev_item))
7038 return -EINVAL;
7039 }
7040
7041 fill_device_from_item(leaf, dev_item, device);
7042 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
7043 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
7044 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
7045 device->fs_devices->total_rw_bytes += device->total_bytes;
7046 atomic64_add(device->total_bytes - device->bytes_used,
7047 &fs_info->free_chunk_space);
7048 }
7049 ret = 0;
7050 return ret;
7051 }
7052
7053 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
7054 {
7055 struct btrfs_root *root = fs_info->tree_root;
7056 struct btrfs_super_block *super_copy = fs_info->super_copy;
7057 struct extent_buffer *sb;
7058 struct btrfs_disk_key *disk_key;
7059 struct btrfs_chunk *chunk;
7060 u8 *array_ptr;
7061 unsigned long sb_array_offset;
7062 int ret = 0;
7063 u32 num_stripes;
7064 u32 array_size;
7065 u32 len = 0;
7066 u32 cur_offset;
7067 u64 type;
7068 struct btrfs_key key;
7069
7070 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
7071
7072
7073
7074
7075
7076 sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
7077 if (IS_ERR(sb))
7078 return PTR_ERR(sb);
7079 set_extent_buffer_uptodate(sb);
7080 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093 if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
7094 SetPageUptodate(sb->pages[0]);
7095
7096 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
7097 array_size = btrfs_super_sys_array_size(super_copy);
7098
7099 array_ptr = super_copy->sys_chunk_array;
7100 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
7101 cur_offset = 0;
7102
7103 while (cur_offset < array_size) {
7104 disk_key = (struct btrfs_disk_key *)array_ptr;
7105 len = sizeof(*disk_key);
7106 if (cur_offset + len > array_size)
7107 goto out_short_read;
7108
7109 btrfs_disk_key_to_cpu(&key, disk_key);
7110
7111 array_ptr += len;
7112 sb_array_offset += len;
7113 cur_offset += len;
7114
7115 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7116 chunk = (struct btrfs_chunk *)sb_array_offset;
7117
7118
7119
7120
7121 len = btrfs_chunk_item_size(1);
7122 if (cur_offset + len > array_size)
7123 goto out_short_read;
7124
7125 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
7126 if (!num_stripes) {
7127 btrfs_err(fs_info,
7128 "invalid number of stripes %u in sys_array at offset %u",
7129 num_stripes, cur_offset);
7130 ret = -EIO;
7131 break;
7132 }
7133
7134 type = btrfs_chunk_type(sb, chunk);
7135 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
7136 btrfs_err(fs_info,
7137 "invalid chunk type %llu in sys_array at offset %u",
7138 type, cur_offset);
7139 ret = -EIO;
7140 break;
7141 }
7142
7143 len = btrfs_chunk_item_size(num_stripes);
7144 if (cur_offset + len > array_size)
7145 goto out_short_read;
7146
7147 ret = read_one_chunk(&key, sb, chunk);
7148 if (ret)
7149 break;
7150 } else {
7151 btrfs_err(fs_info,
7152 "unexpected item type %u in sys_array at offset %u",
7153 (u32)key.type, cur_offset);
7154 ret = -EIO;
7155 break;
7156 }
7157 array_ptr += len;
7158 sb_array_offset += len;
7159 cur_offset += len;
7160 }
7161 clear_extent_buffer_uptodate(sb);
7162 free_extent_buffer_stale(sb);
7163 return ret;
7164
7165 out_short_read:
7166 btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
7167 len, cur_offset);
7168 clear_extent_buffer_uptodate(sb);
7169 free_extent_buffer_stale(sb);
7170 return -EIO;
7171 }
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
7182 struct btrfs_device *failing_dev)
7183 {
7184 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
7185 struct extent_map *em;
7186 u64 next_start = 0;
7187 bool ret = true;
7188
7189 read_lock(&map_tree->lock);
7190 em = lookup_extent_mapping(map_tree, 0, (u64)-1);
7191 read_unlock(&map_tree->lock);
7192
7193 if (!em) {
7194 ret = false;
7195 goto out;
7196 }
7197 while (em) {
7198 struct map_lookup *map;
7199 int missing = 0;
7200 int max_tolerated;
7201 int i;
7202
7203 map = em->map_lookup;
7204 max_tolerated =
7205 btrfs_get_num_tolerated_disk_barrier_failures(
7206 map->type);
7207 for (i = 0; i < map->num_stripes; i++) {
7208 struct btrfs_device *dev = map->stripes[i].dev;
7209
7210 if (!dev || !dev->bdev ||
7211 test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
7212 dev->last_flush_error)
7213 missing++;
7214 else if (failing_dev && failing_dev == dev)
7215 missing++;
7216 }
7217 if (missing > max_tolerated) {
7218 if (!failing_dev)
7219 btrfs_warn(fs_info,
7220 "chunk %llu missing %d devices, max tolerance is %d for writable mount",
7221 em->start, missing, max_tolerated);
7222 free_extent_map(em);
7223 ret = false;
7224 goto out;
7225 }
7226 next_start = extent_map_end(em);
7227 free_extent_map(em);
7228
7229 read_lock(&map_tree->lock);
7230 em = lookup_extent_mapping(map_tree, next_start,
7231 (u64)(-1) - next_start);
7232 read_unlock(&map_tree->lock);
7233 }
7234 out:
7235 return ret;
7236 }
7237
7238 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
7239 {
7240 struct btrfs_root *root = fs_info->chunk_root;
7241 struct btrfs_path *path;
7242 struct extent_buffer *leaf;
7243 struct btrfs_key key;
7244 struct btrfs_key found_key;
7245 int ret;
7246 int slot;
7247 u64 total_dev = 0;
7248
7249 path = btrfs_alloc_path();
7250 if (!path)
7251 return -ENOMEM;
7252
7253
7254
7255
7256
7257 mutex_lock(&uuid_mutex);
7258 mutex_lock(&fs_info->chunk_mutex);
7259
7260
7261
7262
7263
7264
7265
7266 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
7267 key.offset = 0;
7268 key.type = 0;
7269 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7270 if (ret < 0)
7271 goto error;
7272 while (1) {
7273 leaf = path->nodes[0];
7274 slot = path->slots[0];
7275 if (slot >= btrfs_header_nritems(leaf)) {
7276 ret = btrfs_next_leaf(root, path);
7277 if (ret == 0)
7278 continue;
7279 if (ret < 0)
7280 goto error;
7281 break;
7282 }
7283 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7284 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
7285 struct btrfs_dev_item *dev_item;
7286 dev_item = btrfs_item_ptr(leaf, slot,
7287 struct btrfs_dev_item);
7288 ret = read_one_dev(leaf, dev_item);
7289 if (ret)
7290 goto error;
7291 total_dev++;
7292 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7293 struct btrfs_chunk *chunk;
7294 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7295 ret = read_one_chunk(&found_key, leaf, chunk);
7296 if (ret)
7297 goto error;
7298 }
7299 path->slots[0]++;
7300 }
7301
7302
7303
7304
7305
7306 if (total_dev != fs_info->fs_devices->total_devices) {
7307 btrfs_err(fs_info,
7308 "super_num_devices %llu mismatch with num_devices %llu found here",
7309 btrfs_super_num_devices(fs_info->super_copy),
7310 total_dev);
7311 ret = -EINVAL;
7312 goto error;
7313 }
7314 if (btrfs_super_total_bytes(fs_info->super_copy) <
7315 fs_info->fs_devices->total_rw_bytes) {
7316 btrfs_err(fs_info,
7317 "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
7318 btrfs_super_total_bytes(fs_info->super_copy),
7319 fs_info->fs_devices->total_rw_bytes);
7320 ret = -EINVAL;
7321 goto error;
7322 }
7323 ret = 0;
7324 error:
7325 mutex_unlock(&fs_info->chunk_mutex);
7326 mutex_unlock(&uuid_mutex);
7327
7328 btrfs_free_path(path);
7329 return ret;
7330 }
7331
7332 void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7333 {
7334 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7335 struct btrfs_device *device;
7336
7337 while (fs_devices) {
7338 mutex_lock(&fs_devices->device_list_mutex);
7339 list_for_each_entry(device, &fs_devices->devices, dev_list)
7340 device->fs_info = fs_info;
7341 mutex_unlock(&fs_devices->device_list_mutex);
7342
7343 fs_devices = fs_devices->seed;
7344 }
7345 }
7346
7347 static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
7348 const struct btrfs_dev_stats_item *ptr,
7349 int index)
7350 {
7351 u64 val;
7352
7353 read_extent_buffer(eb, &val,
7354 offsetof(struct btrfs_dev_stats_item, values) +
7355 ((unsigned long)ptr) + (index * sizeof(u64)),
7356 sizeof(val));
7357 return val;
7358 }
7359
7360 static void btrfs_set_dev_stats_value(struct extent_buffer *eb,
7361 struct btrfs_dev_stats_item *ptr,
7362 int index, u64 val)
7363 {
7364 write_extent_buffer(eb, &val,
7365 offsetof(struct btrfs_dev_stats_item, values) +
7366 ((unsigned long)ptr) + (index * sizeof(u64)),
7367 sizeof(val));
7368 }
7369
7370 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7371 {
7372 struct btrfs_key key;
7373 struct btrfs_root *dev_root = fs_info->dev_root;
7374 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7375 struct extent_buffer *eb;
7376 int slot;
7377 int ret = 0;
7378 struct btrfs_device *device;
7379 struct btrfs_path *path = NULL;
7380 int i;
7381
7382 path = btrfs_alloc_path();
7383 if (!path)
7384 return -ENOMEM;
7385
7386 mutex_lock(&fs_devices->device_list_mutex);
7387 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7388 int item_size;
7389 struct btrfs_dev_stats_item *ptr;
7390
7391 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7392 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7393 key.offset = device->devid;
7394 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
7395 if (ret) {
7396 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7397 btrfs_dev_stat_set(device, i, 0);
7398 device->dev_stats_valid = 1;
7399 btrfs_release_path(path);
7400 continue;
7401 }
7402 slot = path->slots[0];
7403 eb = path->nodes[0];
7404 item_size = btrfs_item_size_nr(eb, slot);
7405
7406 ptr = btrfs_item_ptr(eb, slot,
7407 struct btrfs_dev_stats_item);
7408
7409 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7410 if (item_size >= (1 + i) * sizeof(__le64))
7411 btrfs_dev_stat_set(device, i,
7412 btrfs_dev_stats_value(eb, ptr, i));
7413 else
7414 btrfs_dev_stat_set(device, i, 0);
7415 }
7416
7417 device->dev_stats_valid = 1;
7418 btrfs_dev_stat_print_on_load(device);
7419 btrfs_release_path(path);
7420 }
7421 mutex_unlock(&fs_devices->device_list_mutex);
7422
7423 btrfs_free_path(path);
7424 return ret < 0 ? ret : 0;
7425 }
7426
7427 static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7428 struct btrfs_device *device)
7429 {
7430 struct btrfs_fs_info *fs_info = trans->fs_info;
7431 struct btrfs_root *dev_root = fs_info->dev_root;
7432 struct btrfs_path *path;
7433 struct btrfs_key key;
7434 struct extent_buffer *eb;
7435 struct btrfs_dev_stats_item *ptr;
7436 int ret;
7437 int i;
7438
7439 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7440 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7441 key.offset = device->devid;
7442
7443 path = btrfs_alloc_path();
7444 if (!path)
7445 return -ENOMEM;
7446 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7447 if (ret < 0) {
7448 btrfs_warn_in_rcu(fs_info,
7449 "error %d while searching for dev_stats item for device %s",
7450 ret, rcu_str_deref(device->name));
7451 goto out;
7452 }
7453
7454 if (ret == 0 &&
7455 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7456
7457 ret = btrfs_del_item(trans, dev_root, path);
7458 if (ret != 0) {
7459 btrfs_warn_in_rcu(fs_info,
7460 "delete too small dev_stats item for device %s failed %d",
7461 rcu_str_deref(device->name), ret);
7462 goto out;
7463 }
7464 ret = 1;
7465 }
7466
7467 if (ret == 1) {
7468
7469 btrfs_release_path(path);
7470 ret = btrfs_insert_empty_item(trans, dev_root, path,
7471 &key, sizeof(*ptr));
7472 if (ret < 0) {
7473 btrfs_warn_in_rcu(fs_info,
7474 "insert dev_stats item for device %s failed %d",
7475 rcu_str_deref(device->name), ret);
7476 goto out;
7477 }
7478 }
7479
7480 eb = path->nodes[0];
7481 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7482 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7483 btrfs_set_dev_stats_value(eb, ptr, i,
7484 btrfs_dev_stat_read(device, i));
7485 btrfs_mark_buffer_dirty(eb);
7486
7487 out:
7488 btrfs_free_path(path);
7489 return ret;
7490 }
7491
7492
7493
7494
7495 int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
7496 {
7497 struct btrfs_fs_info *fs_info = trans->fs_info;
7498 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7499 struct btrfs_device *device;
7500 int stats_cnt;
7501 int ret = 0;
7502
7503 mutex_lock(&fs_devices->device_list_mutex);
7504 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7505 stats_cnt = atomic_read(&device->dev_stats_ccnt);
7506 if (!device->dev_stats_valid || stats_cnt == 0)
7507 continue;
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521 smp_rmb();
7522
7523 ret = update_dev_stat_item(trans, device);
7524 if (!ret)
7525 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7526 }
7527 mutex_unlock(&fs_devices->device_list_mutex);
7528
7529 return ret;
7530 }
7531
7532 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7533 {
7534 btrfs_dev_stat_inc(dev, index);
7535 btrfs_dev_stat_print_on_error(dev);
7536 }
7537
7538 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7539 {
7540 if (!dev->dev_stats_valid)
7541 return;
7542 btrfs_err_rl_in_rcu(dev->fs_info,
7543 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7544 rcu_str_deref(dev->name),
7545 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7546 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7547 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7548 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7549 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7550 }
7551
7552 static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7553 {
7554 int i;
7555
7556 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7557 if (btrfs_dev_stat_read(dev, i) != 0)
7558 break;
7559 if (i == BTRFS_DEV_STAT_VALUES_MAX)
7560 return;
7561
7562 btrfs_info_in_rcu(dev->fs_info,
7563 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7564 rcu_str_deref(dev->name),
7565 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7566 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7567 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7568 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7569 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7570 }
7571
7572 int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7573 struct btrfs_ioctl_get_dev_stats *stats)
7574 {
7575 struct btrfs_device *dev;
7576 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7577 int i;
7578
7579 mutex_lock(&fs_devices->device_list_mutex);
7580 dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
7581 true);
7582 mutex_unlock(&fs_devices->device_list_mutex);
7583
7584 if (!dev) {
7585 btrfs_warn(fs_info, "get dev_stats failed, device not found");
7586 return -ENODEV;
7587 } else if (!dev->dev_stats_valid) {
7588 btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7589 return -ENODEV;
7590 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7591 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7592 if (stats->nr_items > i)
7593 stats->values[i] =
7594 btrfs_dev_stat_read_and_reset(dev, i);
7595 else
7596 btrfs_dev_stat_set(dev, i, 0);
7597 }
7598 btrfs_info(fs_info, "device stats zeroed by %s (%d)",
7599 current->comm, task_pid_nr(current));
7600 } else {
7601 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7602 if (stats->nr_items > i)
7603 stats->values[i] = btrfs_dev_stat_read(dev, i);
7604 }
7605 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7606 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7607 return 0;
7608 }
7609
7610 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
7611 {
7612 struct buffer_head *bh;
7613 struct btrfs_super_block *disk_super;
7614 int copy_num;
7615
7616 if (!bdev)
7617 return;
7618
7619 for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
7620 copy_num++) {
7621
7622 if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
7623 continue;
7624
7625 disk_super = (struct btrfs_super_block *)bh->b_data;
7626
7627 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
7628 set_buffer_dirty(bh);
7629 sync_dirty_buffer(bh);
7630 brelse(bh);
7631 }
7632
7633
7634 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
7635
7636
7637 update_dev_time(device_path);
7638 }
7639
7640
7641
7642
7643
7644
7645
7646
7647 void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
7648 {
7649 struct btrfs_device *curr, *next;
7650
7651 ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
7652
7653 if (list_empty(&trans->dev_update_list))
7654 return;
7655
7656
7657
7658
7659
7660
7661 mutex_lock(&trans->fs_info->chunk_mutex);
7662 list_for_each_entry_safe(curr, next, &trans->dev_update_list,
7663 post_commit_list) {
7664 list_del_init(&curr->post_commit_list);
7665 curr->commit_total_bytes = curr->disk_total_bytes;
7666 curr->commit_bytes_used = curr->bytes_used;
7667 }
7668 mutex_unlock(&trans->fs_info->chunk_mutex);
7669 }
7670
7671 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
7672 {
7673 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7674 while (fs_devices) {
7675 fs_devices->fs_info = fs_info;
7676 fs_devices = fs_devices->seed;
7677 }
7678 }
7679
7680 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
7681 {
7682 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7683 while (fs_devices) {
7684 fs_devices->fs_info = NULL;
7685 fs_devices = fs_devices->seed;
7686 }
7687 }
7688
7689
7690
7691
7692 int btrfs_bg_type_to_factor(u64 flags)
7693 {
7694 const int index = btrfs_bg_flags_to_raid_index(flags);
7695
7696 return btrfs_raid_array[index].ncopies;
7697 }
7698
7699
7700
7701 static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7702 u64 chunk_offset, u64 devid,
7703 u64 physical_offset, u64 physical_len)
7704 {
7705 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7706 struct extent_map *em;
7707 struct map_lookup *map;
7708 struct btrfs_device *dev;
7709 u64 stripe_len;
7710 bool found = false;
7711 int ret = 0;
7712 int i;
7713
7714 read_lock(&em_tree->lock);
7715 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
7716 read_unlock(&em_tree->lock);
7717
7718 if (!em) {
7719 btrfs_err(fs_info,
7720 "dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
7721 physical_offset, devid);
7722 ret = -EUCLEAN;
7723 goto out;
7724 }
7725
7726 map = em->map_lookup;
7727 stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
7728 if (physical_len != stripe_len) {
7729 btrfs_err(fs_info,
7730 "dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
7731 physical_offset, devid, em->start, physical_len,
7732 stripe_len);
7733 ret = -EUCLEAN;
7734 goto out;
7735 }
7736
7737 for (i = 0; i < map->num_stripes; i++) {
7738 if (map->stripes[i].dev->devid == devid &&
7739 map->stripes[i].physical == physical_offset) {
7740 found = true;
7741 if (map->verified_stripes >= map->num_stripes) {
7742 btrfs_err(fs_info,
7743 "too many dev extents for chunk %llu found",
7744 em->start);
7745 ret = -EUCLEAN;
7746 goto out;
7747 }
7748 map->verified_stripes++;
7749 break;
7750 }
7751 }
7752 if (!found) {
7753 btrfs_err(fs_info,
7754 "dev extent physical offset %llu devid %llu has no corresponding chunk",
7755 physical_offset, devid);
7756 ret = -EUCLEAN;
7757 }
7758
7759
7760 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
7761 if (!dev) {
7762 btrfs_err(fs_info, "failed to find devid %llu", devid);
7763 ret = -EUCLEAN;
7764 goto out;
7765 }
7766
7767
7768 if (dev->disk_total_bytes == 0) {
7769 dev = btrfs_find_device(fs_info->fs_devices->seed, devid, NULL,
7770 NULL, false);
7771 if (!dev) {
7772 btrfs_err(fs_info, "failed to find seed devid %llu",
7773 devid);
7774 ret = -EUCLEAN;
7775 goto out;
7776 }
7777 }
7778
7779 if (physical_offset + physical_len > dev->disk_total_bytes) {
7780 btrfs_err(fs_info,
7781 "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
7782 devid, physical_offset, physical_len,
7783 dev->disk_total_bytes);
7784 ret = -EUCLEAN;
7785 goto out;
7786 }
7787 out:
7788 free_extent_map(em);
7789 return ret;
7790 }
7791
7792 static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
7793 {
7794 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7795 struct extent_map *em;
7796 struct rb_node *node;
7797 int ret = 0;
7798
7799 read_lock(&em_tree->lock);
7800 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
7801 em = rb_entry(node, struct extent_map, rb_node);
7802 if (em->map_lookup->num_stripes !=
7803 em->map_lookup->verified_stripes) {
7804 btrfs_err(fs_info,
7805 "chunk %llu has missing dev extent, have %d expect %d",
7806 em->start, em->map_lookup->verified_stripes,
7807 em->map_lookup->num_stripes);
7808 ret = -EUCLEAN;
7809 goto out;
7810 }
7811 }
7812 out:
7813 read_unlock(&em_tree->lock);
7814 return ret;
7815 }
7816
7817
7818
7819
7820
7821
7822
7823
7824 int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
7825 {
7826 struct btrfs_path *path;
7827 struct btrfs_root *root = fs_info->dev_root;
7828 struct btrfs_key key;
7829 u64 prev_devid = 0;
7830 u64 prev_dev_ext_end = 0;
7831 int ret = 0;
7832
7833 key.objectid = 1;
7834 key.type = BTRFS_DEV_EXTENT_KEY;
7835 key.offset = 0;
7836
7837 path = btrfs_alloc_path();
7838 if (!path)
7839 return -ENOMEM;
7840
7841 path->reada = READA_FORWARD;
7842 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7843 if (ret < 0)
7844 goto out;
7845
7846 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7847 ret = btrfs_next_item(root, path);
7848 if (ret < 0)
7849 goto out;
7850
7851 if (ret > 0) {
7852 ret = -EUCLEAN;
7853 goto out;
7854 }
7855 }
7856 while (1) {
7857 struct extent_buffer *leaf = path->nodes[0];
7858 struct btrfs_dev_extent *dext;
7859 int slot = path->slots[0];
7860 u64 chunk_offset;
7861 u64 physical_offset;
7862 u64 physical_len;
7863 u64 devid;
7864
7865 btrfs_item_key_to_cpu(leaf, &key, slot);
7866 if (key.type != BTRFS_DEV_EXTENT_KEY)
7867 break;
7868 devid = key.objectid;
7869 physical_offset = key.offset;
7870
7871 dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7872 chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
7873 physical_len = btrfs_dev_extent_length(leaf, dext);
7874
7875
7876 if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
7877 btrfs_err(fs_info,
7878 "dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
7879 devid, physical_offset, prev_dev_ext_end);
7880 ret = -EUCLEAN;
7881 goto out;
7882 }
7883
7884 ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
7885 physical_offset, physical_len);
7886 if (ret < 0)
7887 goto out;
7888 prev_devid = devid;
7889 prev_dev_ext_end = physical_offset + physical_len;
7890
7891 ret = btrfs_next_item(root, path);
7892 if (ret < 0)
7893 goto out;
7894 if (ret > 0) {
7895 ret = 0;
7896 break;
7897 }
7898 }
7899
7900
7901 ret = verify_chunk_dev_extent_mapping(fs_info);
7902 out:
7903 btrfs_free_path(path);
7904 return ret;
7905 }
7906
7907
7908
7909
7910
7911 bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
7912 {
7913 struct btrfs_swapfile_pin *sp;
7914 struct rb_node *node;
7915
7916 spin_lock(&fs_info->swapfile_pins_lock);
7917 node = fs_info->swapfile_pins.rb_node;
7918 while (node) {
7919 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
7920 if (ptr < sp->ptr)
7921 node = node->rb_left;
7922 else if (ptr > sp->ptr)
7923 node = node->rb_right;
7924 else
7925 break;
7926 }
7927 spin_unlock(&fs_info->swapfile_pins_lock);
7928 return node != NULL;
7929 }