This source file includes following definitions.
- speed_min
- speed_max
- rdev_init_wb
- mddev_create_wb_pool
- mddev_destroy_wb_pool
- bio_alloc_mddev
- md_bio_alloc_sync
- md_new_event
- is_suspended
- md_handle_request
- md_make_request
- mddev_suspend
- mddev_resume
- mddev_congested
- md_congested
- md_end_flush
- submit_flushes
- md_submit_flush_data
- md_flush_request
- mddev_get
- mddev_put
- mddev_init
- mddev_find
- mddev_unlock
- md_find_rdev_nr_rcu
- find_rdev
- md_find_rdev_rcu
- find_pers
- calc_dev_sboffset
- alloc_disk_sb
- md_rdev_clear
- super_written
- md_super_write
- md_super_wait
- sync_page_io
- read_disk_sb
- md_uuid_equal
- md_sb_equal
- md_csum_fold
- calc_sb_csum
- md_check_no_bitmap
- super_90_load
- super_90_validate
- super_90_sync
- super_90_rdev_size_change
- super_90_allow_new_offset
- calc_sb_1_csum
- super_1_load
- super_1_validate
- super_1_sync
- super_1_rdev_size_change
- super_1_allow_new_offset
- sync_super
- match_mddev_units
- md_integrity_register
- md_integrity_add_rdev
- bind_rdev_to_array
- md_delayed_delete
- unbind_rdev_from_array
- lock_rdev
- unlock_rdev
- export_rdev
- md_kick_rdev_from_array
- export_array
- set_in_sync
- sync_sbs
- does_sb_need_changing
- md_update_sb
- add_bound_rdev
- cmd_match
- state_show
- state_store
- errors_show
- errors_store
- slot_show
- slot_store
- offset_show
- offset_store
- new_offset_show
- new_offset_store
- rdev_size_show
- overlaps
- strict_blocks_to_sectors
- rdev_size_store
- recovery_start_show
- recovery_start_store
- bb_show
- bb_store
- ubb_show
- ubb_store
- ppl_sector_show
- ppl_sector_store
- ppl_size_show
- ppl_size_store
- rdev_attr_show
- rdev_attr_store
- rdev_free
- md_rdev_init
- md_import_device
- analyze_sbs
- strict_strtoul_scaled
- safe_delay_show
- safe_delay_store
- level_show
- level_store
- layout_show
- layout_store
- raid_disks_show
- raid_disks_store
- chunk_size_show
- chunk_size_store
- resync_start_show
- resync_start_store
- match_word
- array_state_show
- array_state_store
- max_corrected_read_errors_show
- max_corrected_read_errors_store
- null_show
- new_dev_store
- bitmap_store
- size_show
- size_store
- metadata_show
- metadata_store
- action_show
- action_store
- last_sync_action_show
- mismatch_cnt_show
- sync_min_show
- sync_min_store
- sync_max_show
- sync_max_store
- degraded_show
- sync_force_parallel_show
- sync_force_parallel_store
- sync_speed_show
- sync_completed_show
- min_sync_show
- min_sync_store
- max_sync_show
- max_sync_store
- suspend_lo_show
- suspend_lo_store
- suspend_hi_show
- suspend_hi_store
- reshape_position_show
- reshape_position_store
- reshape_direction_show
- reshape_direction_store
- array_size_show
- array_size_store
- consistency_policy_show
- consistency_policy_store
- fail_last_dev_show
- fail_last_dev_store
- md_attr_show
- md_attr_store
- md_free
- mddev_delayed_delete
- no_op
- mddev_init_writes_pending
- md_alloc
- md_probe
- add_named_array
- md_safemode_timeout
- md_run
- do_md_run
- md_start
- restart_array
- md_clean
- __md_stop_writes
- md_stop_writes
- mddev_detach
- __md_stop
- md_stop
- md_set_readonly
- do_md_stop
- autorun_array
- autorun_devices
- get_version
- get_array_info
- get_bitmap_file
- get_disk_info
- add_new_disk
- hot_remove_disk
- hot_add_disk
- set_bitmap_file
- set_array_info
- md_set_array_sectors
- update_size
- update_raid_disks
- update_array_info
- set_disk_faulty
- md_getgeo
- md_ioctl_valid
- md_ioctl
- md_compat_ioctl
- md_open
- md_release
- md_media_changed
- md_revalidate
- md_thread
- md_wakeup_thread
- md_register_thread
- md_unregister_thread
- md_error
- status_unused
- status_resync
- md_seq_start
- md_seq_next
- md_seq_stop
- md_seq_show
- md_seq_open
- mdstat_poll
- register_md_personality
- unregister_md_personality
- register_md_cluster_operations
- unregister_md_cluster_operations
- md_setup_cluster
- md_cluster_stop
- is_mddev_idle
- md_done_sync
- md_write_start
- md_write_inc
- md_write_end
- md_allow_write
- md_do_sync
- remove_and_add_spares
- md_start_sync
- md_check_recovery
- md_reap_sync_thread
- md_wait_for_blocked_rdev
- md_finish_reshape
- rdev_set_badblocks
- rdev_clear_badblocks
- md_notify_reboot
- md_geninit
- md_init
- check_sb_changes
- read_rdev
- md_reload_sb
- md_autodetect_dev
- autostart_arrays
- md_exit
- module_exit
- set_ro
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 #include <linux/sched/mm.h>
41 #include <linux/sched/signal.h>
42 #include <linux/kthread.h>
43 #include <linux/blkdev.h>
44 #include <linux/badblocks.h>
45 #include <linux/sysctl.h>
46 #include <linux/seq_file.h>
47 #include <linux/fs.h>
48 #include <linux/poll.h>
49 #include <linux/ctype.h>
50 #include <linux/string.h>
51 #include <linux/hdreg.h>
52 #include <linux/proc_fs.h>
53 #include <linux/random.h>
54 #include <linux/module.h>
55 #include <linux/reboot.h>
56 #include <linux/file.h>
57 #include <linux/compat.h>
58 #include <linux/delay.h>
59 #include <linux/raid/md_p.h>
60 #include <linux/raid/md_u.h>
61 #include <linux/slab.h>
62 #include <linux/percpu-refcount.h>
63
64 #include <trace/events/block.h>
65 #include "md.h"
66 #include "md-bitmap.h"
67 #include "md-cluster.h"
68
69 #ifndef MODULE
70 static void autostart_arrays(int part);
71 #endif
72
73
74
75
76
77
78 static LIST_HEAD(pers_list);
79 static DEFINE_SPINLOCK(pers_lock);
80
81 static struct kobj_type md_ktype;
82
83 struct md_cluster_operations *md_cluster_ops;
84 EXPORT_SYMBOL(md_cluster_ops);
85 static struct module *md_cluster_mod;
86
87 static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
88 static struct workqueue_struct *md_wq;
89 static struct workqueue_struct *md_misc_wq;
90
91 static int remove_and_add_spares(struct mddev *mddev,
92 struct md_rdev *this);
93 static void mddev_detach(struct mddev *mddev);
94
95
96
97
98
99
100 #define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
101
102
103
104
105
106
107
108
109
110
111
112
113
114 static int sysctl_speed_limit_min = 1000;
115 static int sysctl_speed_limit_max = 200000;
116 static inline int speed_min(struct mddev *mddev)
117 {
118 return mddev->sync_speed_min ?
119 mddev->sync_speed_min : sysctl_speed_limit_min;
120 }
121
122 static inline int speed_max(struct mddev *mddev)
123 {
124 return mddev->sync_speed_max ?
125 mddev->sync_speed_max : sysctl_speed_limit_max;
126 }
127
128 static int rdev_init_wb(struct md_rdev *rdev)
129 {
130 if (rdev->bdev->bd_queue->nr_hw_queues == 1)
131 return 0;
132
133 spin_lock_init(&rdev->wb_list_lock);
134 INIT_LIST_HEAD(&rdev->wb_list);
135 init_waitqueue_head(&rdev->wb_io_wait);
136 set_bit(WBCollisionCheck, &rdev->flags);
137
138 return 1;
139 }
140
141
142
143
144
145 void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
146 bool is_suspend)
147 {
148 if (mddev->bitmap_info.max_write_behind == 0)
149 return;
150
151 if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev))
152 return;
153
154 if (mddev->wb_info_pool == NULL) {
155 unsigned int noio_flag;
156
157 if (!is_suspend)
158 mddev_suspend(mddev);
159 noio_flag = memalloc_noio_save();
160 mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS,
161 sizeof(struct wb_info));
162 memalloc_noio_restore(noio_flag);
163 if (!mddev->wb_info_pool)
164 pr_err("can't alloc memory pool for writemostly\n");
165 if (!is_suspend)
166 mddev_resume(mddev);
167 }
168 }
169 EXPORT_SYMBOL_GPL(mddev_create_wb_pool);
170
171
172
173
174 static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev)
175 {
176 if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags))
177 return;
178
179 if (mddev->wb_info_pool) {
180 struct md_rdev *temp;
181 int num = 0;
182
183
184
185
186 rdev_for_each(temp, mddev)
187 if (temp != rdev &&
188 test_bit(WBCollisionCheck, &temp->flags))
189 num++;
190 if (!num) {
191 mddev_suspend(rdev->mddev);
192 mempool_destroy(mddev->wb_info_pool);
193 mddev->wb_info_pool = NULL;
194 mddev_resume(rdev->mddev);
195 }
196 }
197 }
198
199 static struct ctl_table_header *raid_table_header;
200
201 static struct ctl_table raid_table[] = {
202 {
203 .procname = "speed_limit_min",
204 .data = &sysctl_speed_limit_min,
205 .maxlen = sizeof(int),
206 .mode = S_IRUGO|S_IWUSR,
207 .proc_handler = proc_dointvec,
208 },
209 {
210 .procname = "speed_limit_max",
211 .data = &sysctl_speed_limit_max,
212 .maxlen = sizeof(int),
213 .mode = S_IRUGO|S_IWUSR,
214 .proc_handler = proc_dointvec,
215 },
216 { }
217 };
218
219 static struct ctl_table raid_dir_table[] = {
220 {
221 .procname = "raid",
222 .maxlen = 0,
223 .mode = S_IRUGO|S_IXUGO,
224 .child = raid_table,
225 },
226 { }
227 };
228
229 static struct ctl_table raid_root_table[] = {
230 {
231 .procname = "dev",
232 .maxlen = 0,
233 .mode = 0555,
234 .child = raid_dir_table,
235 },
236 { }
237 };
238
239 static const struct block_device_operations md_fops;
240
241 static int start_readonly;
242
243
244
245
246
247
248
249
250
251 static bool create_on_open = true;
252
253 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
254 struct mddev *mddev)
255 {
256 if (!mddev || !bioset_initialized(&mddev->bio_set))
257 return bio_alloc(gfp_mask, nr_iovecs);
258
259 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
260 }
261 EXPORT_SYMBOL_GPL(bio_alloc_mddev);
262
263 static struct bio *md_bio_alloc_sync(struct mddev *mddev)
264 {
265 if (!mddev || !bioset_initialized(&mddev->sync_set))
266 return bio_alloc(GFP_NOIO, 1);
267
268 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
269 }
270
271
272
273
274
275
276
277
278
279
280
281 static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
282 static atomic_t md_event_count;
283 void md_new_event(struct mddev *mddev)
284 {
285 atomic_inc(&md_event_count);
286 wake_up(&md_event_waiters);
287 }
288 EXPORT_SYMBOL_GPL(md_new_event);
289
290
291
292
293
294 static LIST_HEAD(all_mddevs);
295 static DEFINE_SPINLOCK(all_mddevs_lock);
296
297
298
299
300
301
302
303
304 #define for_each_mddev(_mddev,_tmp) \
305 \
306 for (({ spin_lock(&all_mddevs_lock); \
307 _tmp = all_mddevs.next; \
308 _mddev = NULL;}); \
309 ({ if (_tmp != &all_mddevs) \
310 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
311 spin_unlock(&all_mddevs_lock); \
312 if (_mddev) mddev_put(_mddev); \
313 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
314 _tmp != &all_mddevs;}); \
315 ({ spin_lock(&all_mddevs_lock); \
316 _tmp = _tmp->next;}) \
317 )
318
319
320
321
322
323
324
325
326 static bool is_suspended(struct mddev *mddev, struct bio *bio)
327 {
328 if (mddev->suspended)
329 return true;
330 if (bio_data_dir(bio) != WRITE)
331 return false;
332 if (mddev->suspend_lo >= mddev->suspend_hi)
333 return false;
334 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
335 return false;
336 if (bio_end_sector(bio) < mddev->suspend_lo)
337 return false;
338 return true;
339 }
340
341 void md_handle_request(struct mddev *mddev, struct bio *bio)
342 {
343 check_suspended:
344 rcu_read_lock();
345 if (is_suspended(mddev, bio)) {
346 DEFINE_WAIT(__wait);
347 for (;;) {
348 prepare_to_wait(&mddev->sb_wait, &__wait,
349 TASK_UNINTERRUPTIBLE);
350 if (!is_suspended(mddev, bio))
351 break;
352 rcu_read_unlock();
353 schedule();
354 rcu_read_lock();
355 }
356 finish_wait(&mddev->sb_wait, &__wait);
357 }
358 atomic_inc(&mddev->active_io);
359 rcu_read_unlock();
360
361 if (!mddev->pers->make_request(mddev, bio)) {
362 atomic_dec(&mddev->active_io);
363 wake_up(&mddev->sb_wait);
364 goto check_suspended;
365 }
366
367 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
368 wake_up(&mddev->sb_wait);
369 }
370 EXPORT_SYMBOL(md_handle_request);
371
372 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
373 {
374 const int rw = bio_data_dir(bio);
375 const int sgrp = op_stat_group(bio_op(bio));
376 struct mddev *mddev = q->queuedata;
377 unsigned int sectors;
378
379 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
380 bio_io_error(bio);
381 return BLK_QC_T_NONE;
382 }
383
384 blk_queue_split(q, &bio);
385
386 if (mddev == NULL || mddev->pers == NULL) {
387 bio_io_error(bio);
388 return BLK_QC_T_NONE;
389 }
390 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
391 if (bio_sectors(bio) != 0)
392 bio->bi_status = BLK_STS_IOERR;
393 bio_endio(bio);
394 return BLK_QC_T_NONE;
395 }
396
397
398
399
400
401 sectors = bio_sectors(bio);
402
403 bio->bi_opf &= ~REQ_NOMERGE;
404
405 md_handle_request(mddev, bio);
406
407 part_stat_lock();
408 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
409 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
410 part_stat_unlock();
411
412 return BLK_QC_T_NONE;
413 }
414
415
416
417
418
419
420
421 void mddev_suspend(struct mddev *mddev)
422 {
423 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
424 lockdep_assert_held(&mddev->reconfig_mutex);
425 if (mddev->suspended++)
426 return;
427 synchronize_rcu();
428 wake_up(&mddev->sb_wait);
429 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
430 smp_mb__after_atomic();
431 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
432 mddev->pers->quiesce(mddev, 1);
433 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
434 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
435
436 del_timer_sync(&mddev->safemode_timer);
437 }
438 EXPORT_SYMBOL_GPL(mddev_suspend);
439
440 void mddev_resume(struct mddev *mddev)
441 {
442 lockdep_assert_held(&mddev->reconfig_mutex);
443 if (--mddev->suspended)
444 return;
445 wake_up(&mddev->sb_wait);
446 mddev->pers->quiesce(mddev, 0);
447
448 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
449 md_wakeup_thread(mddev->thread);
450 md_wakeup_thread(mddev->sync_thread);
451 }
452 EXPORT_SYMBOL_GPL(mddev_resume);
453
454 int mddev_congested(struct mddev *mddev, int bits)
455 {
456 struct md_personality *pers = mddev->pers;
457 int ret = 0;
458
459 rcu_read_lock();
460 if (mddev->suspended)
461 ret = 1;
462 else if (pers && pers->congested)
463 ret = pers->congested(mddev, bits);
464 rcu_read_unlock();
465 return ret;
466 }
467 EXPORT_SYMBOL_GPL(mddev_congested);
468 static int md_congested(void *data, int bits)
469 {
470 struct mddev *mddev = data;
471 return mddev_congested(mddev, bits);
472 }
473
474
475
476
477
478 static void md_end_flush(struct bio *bio)
479 {
480 struct md_rdev *rdev = bio->bi_private;
481 struct mddev *mddev = rdev->mddev;
482
483 rdev_dec_pending(rdev, mddev);
484
485 if (atomic_dec_and_test(&mddev->flush_pending)) {
486
487 queue_work(md_wq, &mddev->flush_work);
488 }
489 bio_put(bio);
490 }
491
492 static void md_submit_flush_data(struct work_struct *ws);
493
494 static void submit_flushes(struct work_struct *ws)
495 {
496 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
497 struct md_rdev *rdev;
498
499 mddev->start_flush = ktime_get_boottime();
500 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
501 atomic_set(&mddev->flush_pending, 1);
502 rcu_read_lock();
503 rdev_for_each_rcu(rdev, mddev)
504 if (rdev->raid_disk >= 0 &&
505 !test_bit(Faulty, &rdev->flags)) {
506
507
508
509
510 struct bio *bi;
511 atomic_inc(&rdev->nr_pending);
512 atomic_inc(&rdev->nr_pending);
513 rcu_read_unlock();
514 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
515 bi->bi_end_io = md_end_flush;
516 bi->bi_private = rdev;
517 bio_set_dev(bi, rdev->bdev);
518 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
519 atomic_inc(&mddev->flush_pending);
520 submit_bio(bi);
521 rcu_read_lock();
522 rdev_dec_pending(rdev, mddev);
523 }
524 rcu_read_unlock();
525 if (atomic_dec_and_test(&mddev->flush_pending))
526 queue_work(md_wq, &mddev->flush_work);
527 }
528
529 static void md_submit_flush_data(struct work_struct *ws)
530 {
531 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
532 struct bio *bio = mddev->flush_bio;
533
534
535
536
537
538
539
540 mddev->last_flush = mddev->start_flush;
541 mddev->flush_bio = NULL;
542 wake_up(&mddev->sb_wait);
543
544 if (bio->bi_iter.bi_size == 0) {
545
546 bio_endio(bio);
547 } else {
548 bio->bi_opf &= ~REQ_PREFLUSH;
549 md_handle_request(mddev, bio);
550 }
551 }
552
553
554
555
556
557
558
559 bool md_flush_request(struct mddev *mddev, struct bio *bio)
560 {
561 ktime_t start = ktime_get_boottime();
562 spin_lock_irq(&mddev->lock);
563 wait_event_lock_irq(mddev->sb_wait,
564 !mddev->flush_bio ||
565 ktime_after(mddev->last_flush, start),
566 mddev->lock);
567 if (!ktime_after(mddev->last_flush, start)) {
568 WARN_ON(mddev->flush_bio);
569 mddev->flush_bio = bio;
570 bio = NULL;
571 }
572 spin_unlock_irq(&mddev->lock);
573
574 if (!bio) {
575 INIT_WORK(&mddev->flush_work, submit_flushes);
576 queue_work(md_wq, &mddev->flush_work);
577 } else {
578
579 if (bio->bi_iter.bi_size == 0)
580
581 bio_endio(bio);
582 else {
583 bio->bi_opf &= ~REQ_PREFLUSH;
584 return false;
585 }
586 }
587 return true;
588 }
589 EXPORT_SYMBOL(md_flush_request);
590
591 static inline struct mddev *mddev_get(struct mddev *mddev)
592 {
593 atomic_inc(&mddev->active);
594 return mddev;
595 }
596
597 static void mddev_delayed_delete(struct work_struct *ws);
598
599 static void mddev_put(struct mddev *mddev)
600 {
601 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
602 return;
603 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
604 mddev->ctime == 0 && !mddev->hold_active) {
605
606
607 list_del_init(&mddev->all_mddevs);
608
609
610
611
612
613
614 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
615 queue_work(md_misc_wq, &mddev->del_work);
616 }
617 spin_unlock(&all_mddevs_lock);
618 }
619
620 static void md_safemode_timeout(struct timer_list *t);
621
622 void mddev_init(struct mddev *mddev)
623 {
624 kobject_init(&mddev->kobj, &md_ktype);
625 mutex_init(&mddev->open_mutex);
626 mutex_init(&mddev->reconfig_mutex);
627 mutex_init(&mddev->bitmap_info.mutex);
628 INIT_LIST_HEAD(&mddev->disks);
629 INIT_LIST_HEAD(&mddev->all_mddevs);
630 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
631 atomic_set(&mddev->active, 1);
632 atomic_set(&mddev->openers, 0);
633 atomic_set(&mddev->active_io, 0);
634 spin_lock_init(&mddev->lock);
635 atomic_set(&mddev->flush_pending, 0);
636 init_waitqueue_head(&mddev->sb_wait);
637 init_waitqueue_head(&mddev->recovery_wait);
638 mddev->reshape_position = MaxSector;
639 mddev->reshape_backwards = 0;
640 mddev->last_sync_action = "none";
641 mddev->resync_min = 0;
642 mddev->resync_max = MaxSector;
643 mddev->level = LEVEL_NONE;
644 }
645 EXPORT_SYMBOL_GPL(mddev_init);
646
647 static struct mddev *mddev_find(dev_t unit)
648 {
649 struct mddev *mddev, *new = NULL;
650
651 if (unit && MAJOR(unit) != MD_MAJOR)
652 unit &= ~((1<<MdpMinorShift)-1);
653
654 retry:
655 spin_lock(&all_mddevs_lock);
656
657 if (unit) {
658 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
659 if (mddev->unit == unit) {
660 mddev_get(mddev);
661 spin_unlock(&all_mddevs_lock);
662 kfree(new);
663 return mddev;
664 }
665
666 if (new) {
667 list_add(&new->all_mddevs, &all_mddevs);
668 spin_unlock(&all_mddevs_lock);
669 new->hold_active = UNTIL_IOCTL;
670 return new;
671 }
672 } else if (new) {
673
674 static int next_minor = 512;
675 int start = next_minor;
676 int is_free = 0;
677 int dev = 0;
678 while (!is_free) {
679 dev = MKDEV(MD_MAJOR, next_minor);
680 next_minor++;
681 if (next_minor > MINORMASK)
682 next_minor = 0;
683 if (next_minor == start) {
684
685 spin_unlock(&all_mddevs_lock);
686 kfree(new);
687 return NULL;
688 }
689
690 is_free = 1;
691 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
692 if (mddev->unit == dev) {
693 is_free = 0;
694 break;
695 }
696 }
697 new->unit = dev;
698 new->md_minor = MINOR(dev);
699 new->hold_active = UNTIL_STOP;
700 list_add(&new->all_mddevs, &all_mddevs);
701 spin_unlock(&all_mddevs_lock);
702 return new;
703 }
704 spin_unlock(&all_mddevs_lock);
705
706 new = kzalloc(sizeof(*new), GFP_KERNEL);
707 if (!new)
708 return NULL;
709
710 new->unit = unit;
711 if (MAJOR(unit) == MD_MAJOR)
712 new->md_minor = MINOR(unit);
713 else
714 new->md_minor = MINOR(unit) >> MdpMinorShift;
715
716 mddev_init(new);
717
718 goto retry;
719 }
720
721 static struct attribute_group md_redundancy_group;
722
723 void mddev_unlock(struct mddev *mddev)
724 {
725 if (mddev->to_remove) {
726
727
728
729
730
731
732
733
734
735
736
737
738 struct attribute_group *to_remove = mddev->to_remove;
739 mddev->to_remove = NULL;
740 mddev->sysfs_active = 1;
741 mutex_unlock(&mddev->reconfig_mutex);
742
743 if (mddev->kobj.sd) {
744 if (to_remove != &md_redundancy_group)
745 sysfs_remove_group(&mddev->kobj, to_remove);
746 if (mddev->pers == NULL ||
747 mddev->pers->sync_request == NULL) {
748 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
749 if (mddev->sysfs_action)
750 sysfs_put(mddev->sysfs_action);
751 mddev->sysfs_action = NULL;
752 }
753 }
754 mddev->sysfs_active = 0;
755 } else
756 mutex_unlock(&mddev->reconfig_mutex);
757
758
759
760
761 spin_lock(&pers_lock);
762 md_wakeup_thread(mddev->thread);
763 wake_up(&mddev->sb_wait);
764 spin_unlock(&pers_lock);
765 }
766 EXPORT_SYMBOL_GPL(mddev_unlock);
767
768 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
769 {
770 struct md_rdev *rdev;
771
772 rdev_for_each_rcu(rdev, mddev)
773 if (rdev->desc_nr == nr)
774 return rdev;
775
776 return NULL;
777 }
778 EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
779
780 static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
781 {
782 struct md_rdev *rdev;
783
784 rdev_for_each(rdev, mddev)
785 if (rdev->bdev->bd_dev == dev)
786 return rdev;
787
788 return NULL;
789 }
790
791 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
792 {
793 struct md_rdev *rdev;
794
795 rdev_for_each_rcu(rdev, mddev)
796 if (rdev->bdev->bd_dev == dev)
797 return rdev;
798
799 return NULL;
800 }
801 EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
802
803 static struct md_personality *find_pers(int level, char *clevel)
804 {
805 struct md_personality *pers;
806 list_for_each_entry(pers, &pers_list, list) {
807 if (level != LEVEL_NONE && pers->level == level)
808 return pers;
809 if (strcmp(pers->name, clevel)==0)
810 return pers;
811 }
812 return NULL;
813 }
814
815
816 static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
817 {
818 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
819 return MD_NEW_SIZE_SECTORS(num_sectors);
820 }
821
822 static int alloc_disk_sb(struct md_rdev *rdev)
823 {
824 rdev->sb_page = alloc_page(GFP_KERNEL);
825 if (!rdev->sb_page)
826 return -ENOMEM;
827 return 0;
828 }
829
830 void md_rdev_clear(struct md_rdev *rdev)
831 {
832 if (rdev->sb_page) {
833 put_page(rdev->sb_page);
834 rdev->sb_loaded = 0;
835 rdev->sb_page = NULL;
836 rdev->sb_start = 0;
837 rdev->sectors = 0;
838 }
839 if (rdev->bb_page) {
840 put_page(rdev->bb_page);
841 rdev->bb_page = NULL;
842 }
843 badblocks_exit(&rdev->badblocks);
844 }
845 EXPORT_SYMBOL_GPL(md_rdev_clear);
846
847 static void super_written(struct bio *bio)
848 {
849 struct md_rdev *rdev = bio->bi_private;
850 struct mddev *mddev = rdev->mddev;
851
852 if (bio->bi_status) {
853 pr_err("md: super_written gets error=%d\n", bio->bi_status);
854 md_error(mddev, rdev);
855 if (!test_bit(Faulty, &rdev->flags)
856 && (bio->bi_opf & MD_FAILFAST)) {
857 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
858 set_bit(LastDev, &rdev->flags);
859 }
860 } else
861 clear_bit(LastDev, &rdev->flags);
862
863 if (atomic_dec_and_test(&mddev->pending_writes))
864 wake_up(&mddev->sb_wait);
865 rdev_dec_pending(rdev, mddev);
866 bio_put(bio);
867 }
868
869 void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
870 sector_t sector, int size, struct page *page)
871 {
872
873
874
875
876
877
878 struct bio *bio;
879 int ff = 0;
880
881 if (!page)
882 return;
883
884 if (test_bit(Faulty, &rdev->flags))
885 return;
886
887 bio = md_bio_alloc_sync(mddev);
888
889 atomic_inc(&rdev->nr_pending);
890
891 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
892 bio->bi_iter.bi_sector = sector;
893 bio_add_page(bio, page, size, 0);
894 bio->bi_private = rdev;
895 bio->bi_end_io = super_written;
896
897 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
898 test_bit(FailFast, &rdev->flags) &&
899 !test_bit(LastDev, &rdev->flags))
900 ff = MD_FAILFAST;
901 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
902
903 atomic_inc(&mddev->pending_writes);
904 submit_bio(bio);
905 }
906
907 int md_super_wait(struct mddev *mddev)
908 {
909
910 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
911 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
912 return -EAGAIN;
913 return 0;
914 }
915
916 int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
917 struct page *page, int op, int op_flags, bool metadata_op)
918 {
919 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
920 int ret;
921
922 if (metadata_op && rdev->meta_bdev)
923 bio_set_dev(bio, rdev->meta_bdev);
924 else
925 bio_set_dev(bio, rdev->bdev);
926 bio_set_op_attrs(bio, op, op_flags);
927 if (metadata_op)
928 bio->bi_iter.bi_sector = sector + rdev->sb_start;
929 else if (rdev->mddev->reshape_position != MaxSector &&
930 (rdev->mddev->reshape_backwards ==
931 (sector >= rdev->mddev->reshape_position)))
932 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
933 else
934 bio->bi_iter.bi_sector = sector + rdev->data_offset;
935 bio_add_page(bio, page, size, 0);
936
937 submit_bio_wait(bio);
938
939 ret = !bio->bi_status;
940 bio_put(bio);
941 return ret;
942 }
943 EXPORT_SYMBOL_GPL(sync_page_io);
944
945 static int read_disk_sb(struct md_rdev *rdev, int size)
946 {
947 char b[BDEVNAME_SIZE];
948
949 if (rdev->sb_loaded)
950 return 0;
951
952 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
953 goto fail;
954 rdev->sb_loaded = 1;
955 return 0;
956
957 fail:
958 pr_err("md: disabled device %s, could not read superblock.\n",
959 bdevname(rdev->bdev,b));
960 return -EINVAL;
961 }
962
963 static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
964 {
965 return sb1->set_uuid0 == sb2->set_uuid0 &&
966 sb1->set_uuid1 == sb2->set_uuid1 &&
967 sb1->set_uuid2 == sb2->set_uuid2 &&
968 sb1->set_uuid3 == sb2->set_uuid3;
969 }
970
971 static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
972 {
973 int ret;
974 mdp_super_t *tmp1, *tmp2;
975
976 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
977 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
978
979 if (!tmp1 || !tmp2) {
980 ret = 0;
981 goto abort;
982 }
983
984 *tmp1 = *sb1;
985 *tmp2 = *sb2;
986
987
988
989
990 tmp1->nr_disks = 0;
991 tmp2->nr_disks = 0;
992
993 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
994 abort:
995 kfree(tmp1);
996 kfree(tmp2);
997 return ret;
998 }
999
1000 static u32 md_csum_fold(u32 csum)
1001 {
1002 csum = (csum & 0xffff) + (csum >> 16);
1003 return (csum & 0xffff) + (csum >> 16);
1004 }
1005
1006 static unsigned int calc_sb_csum(mdp_super_t *sb)
1007 {
1008 u64 newcsum = 0;
1009 u32 *sb32 = (u32*)sb;
1010 int i;
1011 unsigned int disk_csum, csum;
1012
1013 disk_csum = sb->sb_csum;
1014 sb->sb_csum = 0;
1015
1016 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1017 newcsum += sb32[i];
1018 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1019
1020 #ifdef CONFIG_ALPHA
1021
1022
1023
1024
1025
1026
1027
1028
1029 sb->sb_csum = md_csum_fold(disk_csum);
1030 #else
1031 sb->sb_csum = disk_csum;
1032 #endif
1033 return csum;
1034 }
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066 struct super_type {
1067 char *name;
1068 struct module *owner;
1069 int (*load_super)(struct md_rdev *rdev,
1070 struct md_rdev *refdev,
1071 int minor_version);
1072 int (*validate_super)(struct mddev *mddev,
1073 struct md_rdev *rdev);
1074 void (*sync_super)(struct mddev *mddev,
1075 struct md_rdev *rdev);
1076 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1077 sector_t num_sectors);
1078 int (*allow_new_offset)(struct md_rdev *rdev,
1079 unsigned long long new_offset);
1080 };
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090 int md_check_no_bitmap(struct mddev *mddev)
1091 {
1092 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1093 return 0;
1094 pr_warn("%s: bitmaps are not supported for %s\n",
1095 mdname(mddev), mddev->pers->name);
1096 return 1;
1097 }
1098 EXPORT_SYMBOL(md_check_no_bitmap);
1099
1100
1101
1102
1103 static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1104 {
1105 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1106 mdp_super_t *sb;
1107 int ret;
1108 bool spare_disk = true;
1109
1110
1111
1112
1113
1114
1115
1116 rdev->sb_start = calc_dev_sboffset(rdev);
1117
1118 ret = read_disk_sb(rdev, MD_SB_BYTES);
1119 if (ret)
1120 return ret;
1121
1122 ret = -EINVAL;
1123
1124 bdevname(rdev->bdev, b);
1125 sb = page_address(rdev->sb_page);
1126
1127 if (sb->md_magic != MD_SB_MAGIC) {
1128 pr_warn("md: invalid raid superblock magic on %s\n", b);
1129 goto abort;
1130 }
1131
1132 if (sb->major_version != 0 ||
1133 sb->minor_version < 90 ||
1134 sb->minor_version > 91) {
1135 pr_warn("Bad version number %d.%d on %s\n",
1136 sb->major_version, sb->minor_version, b);
1137 goto abort;
1138 }
1139
1140 if (sb->raid_disks <= 0)
1141 goto abort;
1142
1143 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1144 pr_warn("md: invalid superblock checksum on %s\n", b);
1145 goto abort;
1146 }
1147
1148 rdev->preferred_minor = sb->md_minor;
1149 rdev->data_offset = 0;
1150 rdev->new_data_offset = 0;
1151 rdev->sb_size = MD_SB_BYTES;
1152 rdev->badblocks.shift = -1;
1153
1154 if (sb->level == LEVEL_MULTIPATH)
1155 rdev->desc_nr = -1;
1156 else
1157 rdev->desc_nr = sb->this_disk.number;
1158
1159
1160 if (sb->level == LEVEL_MULTIPATH ||
1161 (rdev->desc_nr >= 0 &&
1162 rdev->desc_nr < MD_SB_DISKS &&
1163 sb->disks[rdev->desc_nr].state &
1164 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1165 spare_disk = false;
1166
1167 if (!refdev) {
1168 if (!spare_disk)
1169 ret = 1;
1170 else
1171 ret = 0;
1172 } else {
1173 __u64 ev1, ev2;
1174 mdp_super_t *refsb = page_address(refdev->sb_page);
1175 if (!md_uuid_equal(refsb, sb)) {
1176 pr_warn("md: %s has different UUID to %s\n",
1177 b, bdevname(refdev->bdev,b2));
1178 goto abort;
1179 }
1180 if (!md_sb_equal(refsb, sb)) {
1181 pr_warn("md: %s has same UUID but different superblock to %s\n",
1182 b, bdevname(refdev->bdev, b2));
1183 goto abort;
1184 }
1185 ev1 = md_event(sb);
1186 ev2 = md_event(refsb);
1187
1188 if (!spare_disk && ev1 > ev2)
1189 ret = 1;
1190 else
1191 ret = 0;
1192 }
1193 rdev->sectors = rdev->sb_start;
1194
1195
1196
1197
1198 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1199 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1200
1201 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1202
1203 ret = -EINVAL;
1204
1205 abort:
1206 return ret;
1207 }
1208
1209
1210
1211
1212 static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1213 {
1214 mdp_disk_t *desc;
1215 mdp_super_t *sb = page_address(rdev->sb_page);
1216 __u64 ev1 = md_event(sb);
1217
1218 rdev->raid_disk = -1;
1219 clear_bit(Faulty, &rdev->flags);
1220 clear_bit(In_sync, &rdev->flags);
1221 clear_bit(Bitmap_sync, &rdev->flags);
1222 clear_bit(WriteMostly, &rdev->flags);
1223
1224 if (mddev->raid_disks == 0) {
1225 mddev->major_version = 0;
1226 mddev->minor_version = sb->minor_version;
1227 mddev->patch_version = sb->patch_version;
1228 mddev->external = 0;
1229 mddev->chunk_sectors = sb->chunk_size >> 9;
1230 mddev->ctime = sb->ctime;
1231 mddev->utime = sb->utime;
1232 mddev->level = sb->level;
1233 mddev->clevel[0] = 0;
1234 mddev->layout = sb->layout;
1235 mddev->raid_disks = sb->raid_disks;
1236 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1237 mddev->events = ev1;
1238 mddev->bitmap_info.offset = 0;
1239 mddev->bitmap_info.space = 0;
1240
1241 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1242 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1243 mddev->reshape_backwards = 0;
1244
1245 if (mddev->minor_version >= 91) {
1246 mddev->reshape_position = sb->reshape_position;
1247 mddev->delta_disks = sb->delta_disks;
1248 mddev->new_level = sb->new_level;
1249 mddev->new_layout = sb->new_layout;
1250 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1251 if (mddev->delta_disks < 0)
1252 mddev->reshape_backwards = 1;
1253 } else {
1254 mddev->reshape_position = MaxSector;
1255 mddev->delta_disks = 0;
1256 mddev->new_level = mddev->level;
1257 mddev->new_layout = mddev->layout;
1258 mddev->new_chunk_sectors = mddev->chunk_sectors;
1259 }
1260 if (mddev->level == 0)
1261 mddev->layout = -1;
1262
1263 if (sb->state & (1<<MD_SB_CLEAN))
1264 mddev->recovery_cp = MaxSector;
1265 else {
1266 if (sb->events_hi == sb->cp_events_hi &&
1267 sb->events_lo == sb->cp_events_lo) {
1268 mddev->recovery_cp = sb->recovery_cp;
1269 } else
1270 mddev->recovery_cp = 0;
1271 }
1272
1273 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1274 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1275 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1276 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1277
1278 mddev->max_disks = MD_SB_DISKS;
1279
1280 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1281 mddev->bitmap_info.file == NULL) {
1282 mddev->bitmap_info.offset =
1283 mddev->bitmap_info.default_offset;
1284 mddev->bitmap_info.space =
1285 mddev->bitmap_info.default_space;
1286 }
1287
1288 } else if (mddev->pers == NULL) {
1289
1290
1291 ++ev1;
1292 if (sb->disks[rdev->desc_nr].state & (
1293 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1294 if (ev1 < mddev->events)
1295 return -EINVAL;
1296 } else if (mddev->bitmap) {
1297
1298
1299
1300 if (ev1 < mddev->bitmap->events_cleared)
1301 return 0;
1302 if (ev1 < mddev->events)
1303 set_bit(Bitmap_sync, &rdev->flags);
1304 } else {
1305 if (ev1 < mddev->events)
1306
1307 return 0;
1308 }
1309
1310 if (mddev->level != LEVEL_MULTIPATH) {
1311 desc = sb->disks + rdev->desc_nr;
1312
1313 if (desc->state & (1<<MD_DISK_FAULTY))
1314 set_bit(Faulty, &rdev->flags);
1315 else if (desc->state & (1<<MD_DISK_SYNC)
1316 ) {
1317 set_bit(In_sync, &rdev->flags);
1318 rdev->raid_disk = desc->raid_disk;
1319 rdev->saved_raid_disk = desc->raid_disk;
1320 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1321
1322
1323
1324 if (mddev->minor_version >= 91) {
1325 rdev->recovery_offset = 0;
1326 rdev->raid_disk = desc->raid_disk;
1327 }
1328 }
1329 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1330 set_bit(WriteMostly, &rdev->flags);
1331 if (desc->state & (1<<MD_DISK_FAILFAST))
1332 set_bit(FailFast, &rdev->flags);
1333 } else
1334 set_bit(In_sync, &rdev->flags);
1335 return 0;
1336 }
1337
1338
1339
1340
1341 static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1342 {
1343 mdp_super_t *sb;
1344 struct md_rdev *rdev2;
1345 int next_spare = mddev->raid_disks;
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357 int i;
1358 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1359
1360 rdev->sb_size = MD_SB_BYTES;
1361
1362 sb = page_address(rdev->sb_page);
1363
1364 memset(sb, 0, sizeof(*sb));
1365
1366 sb->md_magic = MD_SB_MAGIC;
1367 sb->major_version = mddev->major_version;
1368 sb->patch_version = mddev->patch_version;
1369 sb->gvalid_words = 0;
1370 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1371 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1372 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1373 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1374
1375 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1376 sb->level = mddev->level;
1377 sb->size = mddev->dev_sectors / 2;
1378 sb->raid_disks = mddev->raid_disks;
1379 sb->md_minor = mddev->md_minor;
1380 sb->not_persistent = 0;
1381 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1382 sb->state = 0;
1383 sb->events_hi = (mddev->events>>32);
1384 sb->events_lo = (u32)mddev->events;
1385
1386 if (mddev->reshape_position == MaxSector)
1387 sb->minor_version = 90;
1388 else {
1389 sb->minor_version = 91;
1390 sb->reshape_position = mddev->reshape_position;
1391 sb->new_level = mddev->new_level;
1392 sb->delta_disks = mddev->delta_disks;
1393 sb->new_layout = mddev->new_layout;
1394 sb->new_chunk = mddev->new_chunk_sectors << 9;
1395 }
1396 mddev->minor_version = sb->minor_version;
1397 if (mddev->in_sync)
1398 {
1399 sb->recovery_cp = mddev->recovery_cp;
1400 sb->cp_events_hi = (mddev->events>>32);
1401 sb->cp_events_lo = (u32)mddev->events;
1402 if (mddev->recovery_cp == MaxSector)
1403 sb->state = (1<< MD_SB_CLEAN);
1404 } else
1405 sb->recovery_cp = 0;
1406
1407 sb->layout = mddev->layout;
1408 sb->chunk_size = mddev->chunk_sectors << 9;
1409
1410 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1411 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1412
1413 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1414 rdev_for_each(rdev2, mddev) {
1415 mdp_disk_t *d;
1416 int desc_nr;
1417 int is_active = test_bit(In_sync, &rdev2->flags);
1418
1419 if (rdev2->raid_disk >= 0 &&
1420 sb->minor_version >= 91)
1421
1422
1423
1424
1425 is_active = 1;
1426 if (rdev2->raid_disk < 0 ||
1427 test_bit(Faulty, &rdev2->flags))
1428 is_active = 0;
1429 if (is_active)
1430 desc_nr = rdev2->raid_disk;
1431 else
1432 desc_nr = next_spare++;
1433 rdev2->desc_nr = desc_nr;
1434 d = &sb->disks[rdev2->desc_nr];
1435 nr_disks++;
1436 d->number = rdev2->desc_nr;
1437 d->major = MAJOR(rdev2->bdev->bd_dev);
1438 d->minor = MINOR(rdev2->bdev->bd_dev);
1439 if (is_active)
1440 d->raid_disk = rdev2->raid_disk;
1441 else
1442 d->raid_disk = rdev2->desc_nr;
1443 if (test_bit(Faulty, &rdev2->flags))
1444 d->state = (1<<MD_DISK_FAULTY);
1445 else if (is_active) {
1446 d->state = (1<<MD_DISK_ACTIVE);
1447 if (test_bit(In_sync, &rdev2->flags))
1448 d->state |= (1<<MD_DISK_SYNC);
1449 active++;
1450 working++;
1451 } else {
1452 d->state = 0;
1453 spare++;
1454 working++;
1455 }
1456 if (test_bit(WriteMostly, &rdev2->flags))
1457 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1458 if (test_bit(FailFast, &rdev2->flags))
1459 d->state |= (1<<MD_DISK_FAILFAST);
1460 }
1461
1462 for (i=0 ; i < mddev->raid_disks ; i++) {
1463 mdp_disk_t *d = &sb->disks[i];
1464 if (d->state == 0 && d->number == 0) {
1465 d->number = i;
1466 d->raid_disk = i;
1467 d->state = (1<<MD_DISK_REMOVED);
1468 d->state |= (1<<MD_DISK_FAULTY);
1469 failed++;
1470 }
1471 }
1472 sb->nr_disks = nr_disks;
1473 sb->active_disks = active;
1474 sb->working_disks = working;
1475 sb->failed_disks = failed;
1476 sb->spare_disks = spare;
1477
1478 sb->this_disk = sb->disks[rdev->desc_nr];
1479 sb->sb_csum = calc_sb_csum(sb);
1480 }
1481
1482
1483
1484
1485 static unsigned long long
1486 super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1487 {
1488 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1489 return 0;
1490 if (rdev->mddev->bitmap_info.offset)
1491 return 0;
1492 rdev->sb_start = calc_dev_sboffset(rdev);
1493 if (!num_sectors || num_sectors > rdev->sb_start)
1494 num_sectors = rdev->sb_start;
1495
1496
1497
1498 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1499 num_sectors = (sector_t)(2ULL << 32) - 2;
1500 do {
1501 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1502 rdev->sb_page);
1503 } while (md_super_wait(rdev->mddev) < 0);
1504 return num_sectors;
1505 }
1506
1507 static int
1508 super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1509 {
1510
1511 return new_offset == 0;
1512 }
1513
1514
1515
1516
1517
1518 static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1519 {
1520 __le32 disk_csum;
1521 u32 csum;
1522 unsigned long long newcsum;
1523 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1524 __le32 *isuper = (__le32*)sb;
1525
1526 disk_csum = sb->sb_csum;
1527 sb->sb_csum = 0;
1528 newcsum = 0;
1529 for (; size >= 4; size -= 4)
1530 newcsum += le32_to_cpu(*isuper++);
1531
1532 if (size == 2)
1533 newcsum += le16_to_cpu(*(__le16*) isuper);
1534
1535 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1536 sb->sb_csum = disk_csum;
1537 return cpu_to_le32(csum);
1538 }
1539
1540 static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1541 {
1542 struct mdp_superblock_1 *sb;
1543 int ret;
1544 sector_t sb_start;
1545 sector_t sectors;
1546 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1547 int bmask;
1548 bool spare_disk = true;
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558 switch(minor_version) {
1559 case 0:
1560 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1561 sb_start -= 8*2;
1562 sb_start &= ~(sector_t)(4*2-1);
1563 break;
1564 case 1:
1565 sb_start = 0;
1566 break;
1567 case 2:
1568 sb_start = 8;
1569 break;
1570 default:
1571 return -EINVAL;
1572 }
1573 rdev->sb_start = sb_start;
1574
1575
1576
1577
1578 ret = read_disk_sb(rdev, 4096);
1579 if (ret) return ret;
1580
1581 sb = page_address(rdev->sb_page);
1582
1583 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1584 sb->major_version != cpu_to_le32(1) ||
1585 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1586 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1587 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1588 return -EINVAL;
1589
1590 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1591 pr_warn("md: invalid superblock checksum on %s\n",
1592 bdevname(rdev->bdev,b));
1593 return -EINVAL;
1594 }
1595 if (le64_to_cpu(sb->data_size) < 10) {
1596 pr_warn("md: data_size too small on %s\n",
1597 bdevname(rdev->bdev,b));
1598 return -EINVAL;
1599 }
1600 if (sb->pad0 ||
1601 sb->pad3[0] ||
1602 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1603
1604 return -EINVAL;
1605
1606 rdev->preferred_minor = 0xffff;
1607 rdev->data_offset = le64_to_cpu(sb->data_offset);
1608 rdev->new_data_offset = rdev->data_offset;
1609 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1610 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1611 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1612 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1613
1614 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1615 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1616 if (rdev->sb_size & bmask)
1617 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1618
1619 if (minor_version
1620 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1621 return -EINVAL;
1622 if (minor_version
1623 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1624 return -EINVAL;
1625
1626 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1627 rdev->desc_nr = -1;
1628 else
1629 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1630
1631 if (!rdev->bb_page) {
1632 rdev->bb_page = alloc_page(GFP_KERNEL);
1633 if (!rdev->bb_page)
1634 return -ENOMEM;
1635 }
1636 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1637 rdev->badblocks.count == 0) {
1638
1639
1640
1641 s32 offset;
1642 sector_t bb_sector;
1643 __le64 *bbp;
1644 int i;
1645 int sectors = le16_to_cpu(sb->bblog_size);
1646 if (sectors > (PAGE_SIZE / 512))
1647 return -EINVAL;
1648 offset = le32_to_cpu(sb->bblog_offset);
1649 if (offset == 0)
1650 return -EINVAL;
1651 bb_sector = (long long)offset;
1652 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1653 rdev->bb_page, REQ_OP_READ, 0, true))
1654 return -EIO;
1655 bbp = (__le64 *)page_address(rdev->bb_page);
1656 rdev->badblocks.shift = sb->bblog_shift;
1657 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1658 u64 bb = le64_to_cpu(*bbp);
1659 int count = bb & (0x3ff);
1660 u64 sector = bb >> 10;
1661 sector <<= sb->bblog_shift;
1662 count <<= sb->bblog_shift;
1663 if (bb + 1 == 0)
1664 break;
1665 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1666 return -EINVAL;
1667 }
1668 } else if (sb->bblog_offset != 0)
1669 rdev->badblocks.shift = 0;
1670
1671 if ((le32_to_cpu(sb->feature_map) &
1672 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1673 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1674 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1675 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1676 }
1677
1678 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1679 sb->level != 0)
1680 return -EINVAL;
1681
1682
1683 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1684 (rdev->desc_nr >= 0 &&
1685 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1686 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1687 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1688 spare_disk = false;
1689
1690 if (!refdev) {
1691 if (!spare_disk)
1692 ret = 1;
1693 else
1694 ret = 0;
1695 } else {
1696 __u64 ev1, ev2;
1697 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1698
1699 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1700 sb->level != refsb->level ||
1701 sb->layout != refsb->layout ||
1702 sb->chunksize != refsb->chunksize) {
1703 pr_warn("md: %s has strangely different superblock to %s\n",
1704 bdevname(rdev->bdev,b),
1705 bdevname(refdev->bdev,b2));
1706 return -EINVAL;
1707 }
1708 ev1 = le64_to_cpu(sb->events);
1709 ev2 = le64_to_cpu(refsb->events);
1710
1711 if (!spare_disk && ev1 > ev2)
1712 ret = 1;
1713 else
1714 ret = 0;
1715 }
1716 if (minor_version) {
1717 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1718 sectors -= rdev->data_offset;
1719 } else
1720 sectors = rdev->sb_start;
1721 if (sectors < le64_to_cpu(sb->data_size))
1722 return -EINVAL;
1723 rdev->sectors = le64_to_cpu(sb->data_size);
1724 return ret;
1725 }
1726
1727 static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1728 {
1729 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1730 __u64 ev1 = le64_to_cpu(sb->events);
1731
1732 rdev->raid_disk = -1;
1733 clear_bit(Faulty, &rdev->flags);
1734 clear_bit(In_sync, &rdev->flags);
1735 clear_bit(Bitmap_sync, &rdev->flags);
1736 clear_bit(WriteMostly, &rdev->flags);
1737
1738 if (mddev->raid_disks == 0) {
1739 mddev->major_version = 1;
1740 mddev->patch_version = 0;
1741 mddev->external = 0;
1742 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1743 mddev->ctime = le64_to_cpu(sb->ctime);
1744 mddev->utime = le64_to_cpu(sb->utime);
1745 mddev->level = le32_to_cpu(sb->level);
1746 mddev->clevel[0] = 0;
1747 mddev->layout = le32_to_cpu(sb->layout);
1748 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1749 mddev->dev_sectors = le64_to_cpu(sb->size);
1750 mddev->events = ev1;
1751 mddev->bitmap_info.offset = 0;
1752 mddev->bitmap_info.space = 0;
1753
1754
1755
1756 mddev->bitmap_info.default_offset = 1024 >> 9;
1757 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1758 mddev->reshape_backwards = 0;
1759
1760 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1761 memcpy(mddev->uuid, sb->set_uuid, 16);
1762
1763 mddev->max_disks = (4096-256)/2;
1764
1765 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1766 mddev->bitmap_info.file == NULL) {
1767 mddev->bitmap_info.offset =
1768 (__s32)le32_to_cpu(sb->bitmap_offset);
1769
1770
1771
1772
1773
1774 if (mddev->minor_version > 0)
1775 mddev->bitmap_info.space = 0;
1776 else if (mddev->bitmap_info.offset > 0)
1777 mddev->bitmap_info.space =
1778 8 - mddev->bitmap_info.offset;
1779 else
1780 mddev->bitmap_info.space =
1781 -mddev->bitmap_info.offset;
1782 }
1783
1784 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1785 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1786 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1787 mddev->new_level = le32_to_cpu(sb->new_level);
1788 mddev->new_layout = le32_to_cpu(sb->new_layout);
1789 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1790 if (mddev->delta_disks < 0 ||
1791 (mddev->delta_disks == 0 &&
1792 (le32_to_cpu(sb->feature_map)
1793 & MD_FEATURE_RESHAPE_BACKWARDS)))
1794 mddev->reshape_backwards = 1;
1795 } else {
1796 mddev->reshape_position = MaxSector;
1797 mddev->delta_disks = 0;
1798 mddev->new_level = mddev->level;
1799 mddev->new_layout = mddev->layout;
1800 mddev->new_chunk_sectors = mddev->chunk_sectors;
1801 }
1802
1803 if (mddev->level == 0 &&
1804 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1805 mddev->layout = -1;
1806
1807 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1808 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1809
1810 if (le32_to_cpu(sb->feature_map) &
1811 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1812 if (le32_to_cpu(sb->feature_map) &
1813 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1814 return -EINVAL;
1815 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1816 (le32_to_cpu(sb->feature_map) &
1817 MD_FEATURE_MULTIPLE_PPLS))
1818 return -EINVAL;
1819 set_bit(MD_HAS_PPL, &mddev->flags);
1820 }
1821 } else if (mddev->pers == NULL) {
1822
1823
1824 ++ev1;
1825 if (rdev->desc_nr >= 0 &&
1826 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1827 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1828 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1829 if (ev1 < mddev->events)
1830 return -EINVAL;
1831 } else if (mddev->bitmap) {
1832
1833
1834
1835 if (ev1 < mddev->bitmap->events_cleared)
1836 return 0;
1837 if (ev1 < mddev->events)
1838 set_bit(Bitmap_sync, &rdev->flags);
1839 } else {
1840 if (ev1 < mddev->events)
1841
1842 return 0;
1843 }
1844 if (mddev->level != LEVEL_MULTIPATH) {
1845 int role;
1846 if (rdev->desc_nr < 0 ||
1847 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1848 role = MD_DISK_ROLE_SPARE;
1849 rdev->desc_nr = -1;
1850 } else
1851 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1852 switch(role) {
1853 case MD_DISK_ROLE_SPARE:
1854 break;
1855 case MD_DISK_ROLE_FAULTY:
1856 set_bit(Faulty, &rdev->flags);
1857 break;
1858 case MD_DISK_ROLE_JOURNAL:
1859 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1860
1861 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1862 return -EINVAL;
1863 }
1864 set_bit(Journal, &rdev->flags);
1865 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1866 rdev->raid_disk = 0;
1867 break;
1868 default:
1869 rdev->saved_raid_disk = role;
1870 if ((le32_to_cpu(sb->feature_map) &
1871 MD_FEATURE_RECOVERY_OFFSET)) {
1872 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1873 if (!(le32_to_cpu(sb->feature_map) &
1874 MD_FEATURE_RECOVERY_BITMAP))
1875 rdev->saved_raid_disk = -1;
1876 } else {
1877
1878
1879
1880
1881 if (!test_bit(MD_RECOVERY_FROZEN,
1882 &mddev->recovery))
1883 set_bit(In_sync, &rdev->flags);
1884 }
1885 rdev->raid_disk = role;
1886 break;
1887 }
1888 if (sb->devflags & WriteMostly1)
1889 set_bit(WriteMostly, &rdev->flags);
1890 if (sb->devflags & FailFast1)
1891 set_bit(FailFast, &rdev->flags);
1892 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1893 set_bit(Replacement, &rdev->flags);
1894 } else
1895 set_bit(In_sync, &rdev->flags);
1896
1897 return 0;
1898 }
1899
1900 static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1901 {
1902 struct mdp_superblock_1 *sb;
1903 struct md_rdev *rdev2;
1904 int max_dev, i;
1905
1906
1907 sb = page_address(rdev->sb_page);
1908
1909 sb->feature_map = 0;
1910 sb->pad0 = 0;
1911 sb->recovery_offset = cpu_to_le64(0);
1912 memset(sb->pad3, 0, sizeof(sb->pad3));
1913
1914 sb->utime = cpu_to_le64((__u64)mddev->utime);
1915 sb->events = cpu_to_le64(mddev->events);
1916 if (mddev->in_sync)
1917 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1918 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1919 sb->resync_offset = cpu_to_le64(MaxSector);
1920 else
1921 sb->resync_offset = cpu_to_le64(0);
1922
1923 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1924
1925 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1926 sb->size = cpu_to_le64(mddev->dev_sectors);
1927 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1928 sb->level = cpu_to_le32(mddev->level);
1929 sb->layout = cpu_to_le32(mddev->layout);
1930 if (test_bit(FailFast, &rdev->flags))
1931 sb->devflags |= FailFast1;
1932 else
1933 sb->devflags &= ~FailFast1;
1934
1935 if (test_bit(WriteMostly, &rdev->flags))
1936 sb->devflags |= WriteMostly1;
1937 else
1938 sb->devflags &= ~WriteMostly1;
1939 sb->data_offset = cpu_to_le64(rdev->data_offset);
1940 sb->data_size = cpu_to_le64(rdev->sectors);
1941
1942 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1943 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1944 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1945 }
1946
1947 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1948 !test_bit(In_sync, &rdev->flags)) {
1949 sb->feature_map |=
1950 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1951 sb->recovery_offset =
1952 cpu_to_le64(rdev->recovery_offset);
1953 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1954 sb->feature_map |=
1955 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1956 }
1957
1958 if (test_bit(Journal, &rdev->flags))
1959 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1960 if (test_bit(Replacement, &rdev->flags))
1961 sb->feature_map |=
1962 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1963
1964 if (mddev->reshape_position != MaxSector) {
1965 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1966 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1967 sb->new_layout = cpu_to_le32(mddev->new_layout);
1968 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1969 sb->new_level = cpu_to_le32(mddev->new_level);
1970 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1971 if (mddev->delta_disks == 0 &&
1972 mddev->reshape_backwards)
1973 sb->feature_map
1974 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1975 if (rdev->new_data_offset != rdev->data_offset) {
1976 sb->feature_map
1977 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1978 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1979 - rdev->data_offset));
1980 }
1981 }
1982
1983 if (mddev_is_clustered(mddev))
1984 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1985
1986 if (rdev->badblocks.count == 0)
1987 ;
1988 else if (sb->bblog_offset == 0)
1989
1990 md_error(mddev, rdev);
1991 else {
1992 struct badblocks *bb = &rdev->badblocks;
1993 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
1994 u64 *p = bb->page;
1995 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1996 if (bb->changed) {
1997 unsigned seq;
1998
1999 retry:
2000 seq = read_seqbegin(&bb->lock);
2001
2002 memset(bbp, 0xff, PAGE_SIZE);
2003
2004 for (i = 0 ; i < bb->count ; i++) {
2005 u64 internal_bb = p[i];
2006 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2007 | BB_LEN(internal_bb));
2008 bbp[i] = cpu_to_le64(store_bb);
2009 }
2010 bb->changed = 0;
2011 if (read_seqretry(&bb->lock, seq))
2012 goto retry;
2013
2014 bb->sector = (rdev->sb_start +
2015 (int)le32_to_cpu(sb->bblog_offset));
2016 bb->size = le16_to_cpu(sb->bblog_size);
2017 }
2018 }
2019
2020 max_dev = 0;
2021 rdev_for_each(rdev2, mddev)
2022 if (rdev2->desc_nr+1 > max_dev)
2023 max_dev = rdev2->desc_nr+1;
2024
2025 if (max_dev > le32_to_cpu(sb->max_dev)) {
2026 int bmask;
2027 sb->max_dev = cpu_to_le32(max_dev);
2028 rdev->sb_size = max_dev * 2 + 256;
2029 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2030 if (rdev->sb_size & bmask)
2031 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2032 } else
2033 max_dev = le32_to_cpu(sb->max_dev);
2034
2035 for (i=0; i<max_dev;i++)
2036 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2037
2038 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2039 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2040
2041 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2042 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2043 sb->feature_map |=
2044 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2045 else
2046 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2047 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2048 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2049 }
2050
2051 rdev_for_each(rdev2, mddev) {
2052 i = rdev2->desc_nr;
2053 if (test_bit(Faulty, &rdev2->flags))
2054 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2055 else if (test_bit(In_sync, &rdev2->flags))
2056 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2057 else if (test_bit(Journal, &rdev2->flags))
2058 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2059 else if (rdev2->raid_disk >= 0)
2060 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2061 else
2062 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2063 }
2064
2065 sb->sb_csum = calc_sb_1_csum(sb);
2066 }
2067
2068 static unsigned long long
2069 super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2070 {
2071 struct mdp_superblock_1 *sb;
2072 sector_t max_sectors;
2073 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2074 return 0;
2075 if (rdev->data_offset != rdev->new_data_offset)
2076 return 0;
2077 if (rdev->sb_start < rdev->data_offset) {
2078
2079 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2080 max_sectors -= rdev->data_offset;
2081 if (!num_sectors || num_sectors > max_sectors)
2082 num_sectors = max_sectors;
2083 } else if (rdev->mddev->bitmap_info.offset) {
2084
2085 return 0;
2086 } else {
2087
2088 sector_t sb_start;
2089 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
2090 sb_start &= ~(sector_t)(4*2 - 1);
2091 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
2092 if (!num_sectors || num_sectors > max_sectors)
2093 num_sectors = max_sectors;
2094 rdev->sb_start = sb_start;
2095 }
2096 sb = page_address(rdev->sb_page);
2097 sb->data_size = cpu_to_le64(num_sectors);
2098 sb->super_offset = cpu_to_le64(rdev->sb_start);
2099 sb->sb_csum = calc_sb_1_csum(sb);
2100 do {
2101 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2102 rdev->sb_page);
2103 } while (md_super_wait(rdev->mddev) < 0);
2104 return num_sectors;
2105
2106 }
2107
2108 static int
2109 super_1_allow_new_offset(struct md_rdev *rdev,
2110 unsigned long long new_offset)
2111 {
2112
2113 struct bitmap *bitmap;
2114 if (new_offset >= rdev->data_offset)
2115 return 1;
2116
2117
2118
2119 if (rdev->mddev->minor_version == 0)
2120 return 1;
2121
2122
2123
2124
2125
2126
2127
2128 if (rdev->sb_start + (32+4)*2 > new_offset)
2129 return 0;
2130 bitmap = rdev->mddev->bitmap;
2131 if (bitmap && !rdev->mddev->bitmap_info.file &&
2132 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2133 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2134 return 0;
2135 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2136 return 0;
2137
2138 return 1;
2139 }
2140
2141 static struct super_type super_types[] = {
2142 [0] = {
2143 .name = "0.90.0",
2144 .owner = THIS_MODULE,
2145 .load_super = super_90_load,
2146 .validate_super = super_90_validate,
2147 .sync_super = super_90_sync,
2148 .rdev_size_change = super_90_rdev_size_change,
2149 .allow_new_offset = super_90_allow_new_offset,
2150 },
2151 [1] = {
2152 .name = "md-1",
2153 .owner = THIS_MODULE,
2154 .load_super = super_1_load,
2155 .validate_super = super_1_validate,
2156 .sync_super = super_1_sync,
2157 .rdev_size_change = super_1_rdev_size_change,
2158 .allow_new_offset = super_1_allow_new_offset,
2159 },
2160 };
2161
2162 static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2163 {
2164 if (mddev->sync_super) {
2165 mddev->sync_super(mddev, rdev);
2166 return;
2167 }
2168
2169 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2170
2171 super_types[mddev->major_version].sync_super(mddev, rdev);
2172 }
2173
2174 static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2175 {
2176 struct md_rdev *rdev, *rdev2;
2177
2178 rcu_read_lock();
2179 rdev_for_each_rcu(rdev, mddev1) {
2180 if (test_bit(Faulty, &rdev->flags) ||
2181 test_bit(Journal, &rdev->flags) ||
2182 rdev->raid_disk == -1)
2183 continue;
2184 rdev_for_each_rcu(rdev2, mddev2) {
2185 if (test_bit(Faulty, &rdev2->flags) ||
2186 test_bit(Journal, &rdev2->flags) ||
2187 rdev2->raid_disk == -1)
2188 continue;
2189 if (rdev->bdev->bd_contains ==
2190 rdev2->bdev->bd_contains) {
2191 rcu_read_unlock();
2192 return 1;
2193 }
2194 }
2195 }
2196 rcu_read_unlock();
2197 return 0;
2198 }
2199
2200 static LIST_HEAD(pending_raid_disks);
2201
2202
2203
2204
2205
2206
2207
2208
2209 int md_integrity_register(struct mddev *mddev)
2210 {
2211 struct md_rdev *rdev, *reference = NULL;
2212
2213 if (list_empty(&mddev->disks))
2214 return 0;
2215 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2216 return 0;
2217 rdev_for_each(rdev, mddev) {
2218
2219 if (test_bit(Faulty, &rdev->flags))
2220 continue;
2221 if (rdev->raid_disk < 0)
2222 continue;
2223 if (!reference) {
2224
2225 reference = rdev;
2226 continue;
2227 }
2228
2229 if (blk_integrity_compare(reference->bdev->bd_disk,
2230 rdev->bdev->bd_disk) < 0)
2231 return -EINVAL;
2232 }
2233 if (!reference || !bdev_get_integrity(reference->bdev))
2234 return 0;
2235
2236
2237
2238
2239 blk_integrity_register(mddev->gendisk,
2240 bdev_get_integrity(reference->bdev));
2241
2242 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2243 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2244 pr_err("md: failed to create integrity pool for %s\n",
2245 mdname(mddev));
2246 return -EINVAL;
2247 }
2248 return 0;
2249 }
2250 EXPORT_SYMBOL(md_integrity_register);
2251
2252
2253
2254
2255
2256 int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2257 {
2258 struct blk_integrity *bi_mddev;
2259 char name[BDEVNAME_SIZE];
2260
2261 if (!mddev->gendisk)
2262 return 0;
2263
2264 bi_mddev = blk_get_integrity(mddev->gendisk);
2265
2266 if (!bi_mddev)
2267 return 0;
2268
2269 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2270 pr_err("%s: incompatible integrity profile for %s\n",
2271 mdname(mddev), bdevname(rdev->bdev, name));
2272 return -ENXIO;
2273 }
2274
2275 return 0;
2276 }
2277 EXPORT_SYMBOL(md_integrity_add_rdev);
2278
2279 static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2280 {
2281 char b[BDEVNAME_SIZE];
2282 struct kobject *ko;
2283 int err;
2284
2285
2286 if (find_rdev(mddev, rdev->bdev->bd_dev))
2287 return -EEXIST;
2288
2289 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2290 mddev->pers)
2291 return -EROFS;
2292
2293
2294 if (!test_bit(Journal, &rdev->flags) &&
2295 rdev->sectors &&
2296 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2297 if (mddev->pers) {
2298
2299
2300
2301
2302 if (mddev->level > 0)
2303 return -ENOSPC;
2304 } else
2305 mddev->dev_sectors = rdev->sectors;
2306 }
2307
2308
2309
2310
2311
2312 rcu_read_lock();
2313 if (rdev->desc_nr < 0) {
2314 int choice = 0;
2315 if (mddev->pers)
2316 choice = mddev->raid_disks;
2317 while (md_find_rdev_nr_rcu(mddev, choice))
2318 choice++;
2319 rdev->desc_nr = choice;
2320 } else {
2321 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2322 rcu_read_unlock();
2323 return -EBUSY;
2324 }
2325 }
2326 rcu_read_unlock();
2327 if (!test_bit(Journal, &rdev->flags) &&
2328 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2329 pr_warn("md: %s: array is limited to %d devices\n",
2330 mdname(mddev), mddev->max_disks);
2331 return -EBUSY;
2332 }
2333 bdevname(rdev->bdev,b);
2334 strreplace(b, '/', '!');
2335
2336 rdev->mddev = mddev;
2337 pr_debug("md: bind<%s>\n", b);
2338
2339 if (mddev->raid_disks)
2340 mddev_create_wb_pool(mddev, rdev, false);
2341
2342 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2343 goto fail;
2344
2345 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2346 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2347 ;
2348 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2349
2350 list_add_rcu(&rdev->same_set, &mddev->disks);
2351 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2352
2353
2354 mddev->recovery_disabled++;
2355
2356 return 0;
2357
2358 fail:
2359 pr_warn("md: failed to register dev-%s for %s\n",
2360 b, mdname(mddev));
2361 return err;
2362 }
2363
2364 static void md_delayed_delete(struct work_struct *ws)
2365 {
2366 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2367 kobject_del(&rdev->kobj);
2368 kobject_put(&rdev->kobj);
2369 }
2370
2371 static void unbind_rdev_from_array(struct md_rdev *rdev)
2372 {
2373 char b[BDEVNAME_SIZE];
2374
2375 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2376 list_del_rcu(&rdev->same_set);
2377 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2378 mddev_destroy_wb_pool(rdev->mddev, rdev);
2379 rdev->mddev = NULL;
2380 sysfs_remove_link(&rdev->kobj, "block");
2381 sysfs_put(rdev->sysfs_state);
2382 rdev->sysfs_state = NULL;
2383 rdev->badblocks.count = 0;
2384
2385
2386
2387
2388 synchronize_rcu();
2389 INIT_WORK(&rdev->del_work, md_delayed_delete);
2390 kobject_get(&rdev->kobj);
2391 queue_work(md_misc_wq, &rdev->del_work);
2392 }
2393
2394
2395
2396
2397
2398
2399 static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2400 {
2401 int err = 0;
2402 struct block_device *bdev;
2403 char b[BDEVNAME_SIZE];
2404
2405 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2406 shared ? (struct md_rdev *)lock_rdev : rdev);
2407 if (IS_ERR(bdev)) {
2408 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2409 return PTR_ERR(bdev);
2410 }
2411 rdev->bdev = bdev;
2412 return err;
2413 }
2414
2415 static void unlock_rdev(struct md_rdev *rdev)
2416 {
2417 struct block_device *bdev = rdev->bdev;
2418 rdev->bdev = NULL;
2419 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2420 }
2421
2422 void md_autodetect_dev(dev_t dev);
2423
2424 static void export_rdev(struct md_rdev *rdev)
2425 {
2426 char b[BDEVNAME_SIZE];
2427
2428 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2429 md_rdev_clear(rdev);
2430 #ifndef MODULE
2431 if (test_bit(AutoDetected, &rdev->flags))
2432 md_autodetect_dev(rdev->bdev->bd_dev);
2433 #endif
2434 unlock_rdev(rdev);
2435 kobject_put(&rdev->kobj);
2436 }
2437
2438 void md_kick_rdev_from_array(struct md_rdev *rdev)
2439 {
2440 unbind_rdev_from_array(rdev);
2441 export_rdev(rdev);
2442 }
2443 EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2444
2445 static void export_array(struct mddev *mddev)
2446 {
2447 struct md_rdev *rdev;
2448
2449 while (!list_empty(&mddev->disks)) {
2450 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2451 same_set);
2452 md_kick_rdev_from_array(rdev);
2453 }
2454 mddev->raid_disks = 0;
2455 mddev->major_version = 0;
2456 }
2457
2458 static bool set_in_sync(struct mddev *mddev)
2459 {
2460 lockdep_assert_held(&mddev->lock);
2461 if (!mddev->in_sync) {
2462 mddev->sync_checkers++;
2463 spin_unlock(&mddev->lock);
2464 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2465 spin_lock(&mddev->lock);
2466 if (!mddev->in_sync &&
2467 percpu_ref_is_zero(&mddev->writes_pending)) {
2468 mddev->in_sync = 1;
2469
2470
2471
2472
2473 smp_mb();
2474 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2475 sysfs_notify_dirent_safe(mddev->sysfs_state);
2476 }
2477 if (--mddev->sync_checkers == 0)
2478 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2479 }
2480 if (mddev->safemode == 1)
2481 mddev->safemode = 0;
2482 return mddev->in_sync;
2483 }
2484
2485 static void sync_sbs(struct mddev *mddev, int nospares)
2486 {
2487
2488
2489
2490
2491
2492
2493 struct md_rdev *rdev;
2494 rdev_for_each(rdev, mddev) {
2495 if (rdev->sb_events == mddev->events ||
2496 (nospares &&
2497 rdev->raid_disk < 0 &&
2498 rdev->sb_events+1 == mddev->events)) {
2499
2500 rdev->sb_loaded = 2;
2501 } else {
2502 sync_super(mddev, rdev);
2503 rdev->sb_loaded = 1;
2504 }
2505 }
2506 }
2507
2508 static bool does_sb_need_changing(struct mddev *mddev)
2509 {
2510 struct md_rdev *rdev;
2511 struct mdp_superblock_1 *sb;
2512 int role;
2513
2514
2515 rdev_for_each(rdev, mddev)
2516 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2517 break;
2518
2519
2520 if (!rdev)
2521 return false;
2522
2523 sb = page_address(rdev->sb_page);
2524
2525 rdev_for_each(rdev, mddev) {
2526 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2527
2528 if (role == 0xffff && rdev->raid_disk >=0 &&
2529 !test_bit(Faulty, &rdev->flags))
2530 return true;
2531
2532 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2533 return true;
2534 }
2535
2536
2537 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2538 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2539 (mddev->layout != le32_to_cpu(sb->layout)) ||
2540 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2541 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2542 return true;
2543
2544 return false;
2545 }
2546
2547 void md_update_sb(struct mddev *mddev, int force_change)
2548 {
2549 struct md_rdev *rdev;
2550 int sync_req;
2551 int nospares = 0;
2552 int any_badblocks_changed = 0;
2553 int ret = -1;
2554
2555 if (mddev->ro) {
2556 if (force_change)
2557 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2558 return;
2559 }
2560
2561 repeat:
2562 if (mddev_is_clustered(mddev)) {
2563 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2564 force_change = 1;
2565 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2566 nospares = 1;
2567 ret = md_cluster_ops->metadata_update_start(mddev);
2568
2569 if (!does_sb_need_changing(mddev)) {
2570 if (ret == 0)
2571 md_cluster_ops->metadata_update_cancel(mddev);
2572 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2573 BIT(MD_SB_CHANGE_DEVS) |
2574 BIT(MD_SB_CHANGE_CLEAN));
2575 return;
2576 }
2577 }
2578
2579
2580
2581
2582
2583
2584
2585 rdev_for_each(rdev, mddev) {
2586 if (rdev->raid_disk >= 0 &&
2587 mddev->delta_disks >= 0 &&
2588 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2589 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2590 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2591 !test_bit(Journal, &rdev->flags) &&
2592 !test_bit(In_sync, &rdev->flags) &&
2593 mddev->curr_resync_completed > rdev->recovery_offset)
2594 rdev->recovery_offset = mddev->curr_resync_completed;
2595
2596 }
2597 if (!mddev->persistent) {
2598 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2599 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2600 if (!mddev->external) {
2601 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2602 rdev_for_each(rdev, mddev) {
2603 if (rdev->badblocks.changed) {
2604 rdev->badblocks.changed = 0;
2605 ack_all_badblocks(&rdev->badblocks);
2606 md_error(mddev, rdev);
2607 }
2608 clear_bit(Blocked, &rdev->flags);
2609 clear_bit(BlockedBadBlocks, &rdev->flags);
2610 wake_up(&rdev->blocked_wait);
2611 }
2612 }
2613 wake_up(&mddev->sb_wait);
2614 return;
2615 }
2616
2617 spin_lock(&mddev->lock);
2618
2619 mddev->utime = ktime_get_real_seconds();
2620
2621 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2622 force_change = 1;
2623 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2624
2625
2626
2627
2628 nospares = 1;
2629 if (force_change)
2630 nospares = 0;
2631 if (mddev->degraded)
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641 nospares = 0;
2642
2643 sync_req = mddev->in_sync;
2644
2645
2646
2647 if (nospares
2648 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2649 && mddev->can_decrease_events
2650 && mddev->events != 1) {
2651 mddev->events--;
2652 mddev->can_decrease_events = 0;
2653 } else {
2654
2655 mddev->events ++;
2656 mddev->can_decrease_events = nospares;
2657 }
2658
2659
2660
2661
2662
2663
2664 WARN_ON(mddev->events == 0);
2665
2666 rdev_for_each(rdev, mddev) {
2667 if (rdev->badblocks.changed)
2668 any_badblocks_changed++;
2669 if (test_bit(Faulty, &rdev->flags))
2670 set_bit(FaultRecorded, &rdev->flags);
2671 }
2672
2673 sync_sbs(mddev, nospares);
2674 spin_unlock(&mddev->lock);
2675
2676 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2677 mdname(mddev), mddev->in_sync);
2678
2679 if (mddev->queue)
2680 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2681 rewrite:
2682 md_bitmap_update_sb(mddev->bitmap);
2683 rdev_for_each(rdev, mddev) {
2684 char b[BDEVNAME_SIZE];
2685
2686 if (rdev->sb_loaded != 1)
2687 continue;
2688
2689 if (!test_bit(Faulty, &rdev->flags)) {
2690 md_super_write(mddev,rdev,
2691 rdev->sb_start, rdev->sb_size,
2692 rdev->sb_page);
2693 pr_debug("md: (write) %s's sb offset: %llu\n",
2694 bdevname(rdev->bdev, b),
2695 (unsigned long long)rdev->sb_start);
2696 rdev->sb_events = mddev->events;
2697 if (rdev->badblocks.size) {
2698 md_super_write(mddev, rdev,
2699 rdev->badblocks.sector,
2700 rdev->badblocks.size << 9,
2701 rdev->bb_page);
2702 rdev->badblocks.size = 0;
2703 }
2704
2705 } else
2706 pr_debug("md: %s (skipping faulty)\n",
2707 bdevname(rdev->bdev, b));
2708
2709 if (mddev->level == LEVEL_MULTIPATH)
2710
2711 break;
2712 }
2713 if (md_super_wait(mddev) < 0)
2714 goto rewrite;
2715
2716
2717 if (mddev_is_clustered(mddev) && ret == 0)
2718 md_cluster_ops->metadata_update_finish(mddev);
2719
2720 if (mddev->in_sync != sync_req ||
2721 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2722 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2723
2724 goto repeat;
2725 wake_up(&mddev->sb_wait);
2726 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2727 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2728
2729 rdev_for_each(rdev, mddev) {
2730 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2731 clear_bit(Blocked, &rdev->flags);
2732
2733 if (any_badblocks_changed)
2734 ack_all_badblocks(&rdev->badblocks);
2735 clear_bit(BlockedBadBlocks, &rdev->flags);
2736 wake_up(&rdev->blocked_wait);
2737 }
2738 }
2739 EXPORT_SYMBOL(md_update_sb);
2740
2741 static int add_bound_rdev(struct md_rdev *rdev)
2742 {
2743 struct mddev *mddev = rdev->mddev;
2744 int err = 0;
2745 bool add_journal = test_bit(Journal, &rdev->flags);
2746
2747 if (!mddev->pers->hot_remove_disk || add_journal) {
2748
2749
2750
2751
2752 super_types[mddev->major_version].
2753 validate_super(mddev, rdev);
2754 if (add_journal)
2755 mddev_suspend(mddev);
2756 err = mddev->pers->hot_add_disk(mddev, rdev);
2757 if (add_journal)
2758 mddev_resume(mddev);
2759 if (err) {
2760 md_kick_rdev_from_array(rdev);
2761 return err;
2762 }
2763 }
2764 sysfs_notify_dirent_safe(rdev->sysfs_state);
2765
2766 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2767 if (mddev->degraded)
2768 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2769 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2770 md_new_event(mddev);
2771 md_wakeup_thread(mddev->thread);
2772 return 0;
2773 }
2774
2775
2776
2777
2778 static int cmd_match(const char *cmd, const char *str)
2779 {
2780
2781
2782
2783
2784 while (*cmd && *str && *cmd == *str) {
2785 cmd++;
2786 str++;
2787 }
2788 if (*cmd == '\n')
2789 cmd++;
2790 if (*str || *cmd)
2791 return 0;
2792 return 1;
2793 }
2794
2795 struct rdev_sysfs_entry {
2796 struct attribute attr;
2797 ssize_t (*show)(struct md_rdev *, char *);
2798 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2799 };
2800
2801 static ssize_t
2802 state_show(struct md_rdev *rdev, char *page)
2803 {
2804 char *sep = ",";
2805 size_t len = 0;
2806 unsigned long flags = READ_ONCE(rdev->flags);
2807
2808 if (test_bit(Faulty, &flags) ||
2809 (!test_bit(ExternalBbl, &flags) &&
2810 rdev->badblocks.unacked_exist))
2811 len += sprintf(page+len, "faulty%s", sep);
2812 if (test_bit(In_sync, &flags))
2813 len += sprintf(page+len, "in_sync%s", sep);
2814 if (test_bit(Journal, &flags))
2815 len += sprintf(page+len, "journal%s", sep);
2816 if (test_bit(WriteMostly, &flags))
2817 len += sprintf(page+len, "write_mostly%s", sep);
2818 if (test_bit(Blocked, &flags) ||
2819 (rdev->badblocks.unacked_exist
2820 && !test_bit(Faulty, &flags)))
2821 len += sprintf(page+len, "blocked%s", sep);
2822 if (!test_bit(Faulty, &flags) &&
2823 !test_bit(Journal, &flags) &&
2824 !test_bit(In_sync, &flags))
2825 len += sprintf(page+len, "spare%s", sep);
2826 if (test_bit(WriteErrorSeen, &flags))
2827 len += sprintf(page+len, "write_error%s", sep);
2828 if (test_bit(WantReplacement, &flags))
2829 len += sprintf(page+len, "want_replacement%s", sep);
2830 if (test_bit(Replacement, &flags))
2831 len += sprintf(page+len, "replacement%s", sep);
2832 if (test_bit(ExternalBbl, &flags))
2833 len += sprintf(page+len, "external_bbl%s", sep);
2834 if (test_bit(FailFast, &flags))
2835 len += sprintf(page+len, "failfast%s", sep);
2836
2837 if (len)
2838 len -= strlen(sep);
2839
2840 return len+sprintf(page+len, "\n");
2841 }
2842
2843 static ssize_t
2844 state_store(struct md_rdev *rdev, const char *buf, size_t len)
2845 {
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860 int err = -EINVAL;
2861 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2862 md_error(rdev->mddev, rdev);
2863 if (test_bit(Faulty, &rdev->flags))
2864 err = 0;
2865 else
2866 err = -EBUSY;
2867 } else if (cmd_match(buf, "remove")) {
2868 if (rdev->mddev->pers) {
2869 clear_bit(Blocked, &rdev->flags);
2870 remove_and_add_spares(rdev->mddev, rdev);
2871 }
2872 if (rdev->raid_disk >= 0)
2873 err = -EBUSY;
2874 else {
2875 struct mddev *mddev = rdev->mddev;
2876 err = 0;
2877 if (mddev_is_clustered(mddev))
2878 err = md_cluster_ops->remove_disk(mddev, rdev);
2879
2880 if (err == 0) {
2881 md_kick_rdev_from_array(rdev);
2882 if (mddev->pers) {
2883 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2884 md_wakeup_thread(mddev->thread);
2885 }
2886 md_new_event(mddev);
2887 }
2888 }
2889 } else if (cmd_match(buf, "writemostly")) {
2890 set_bit(WriteMostly, &rdev->flags);
2891 mddev_create_wb_pool(rdev->mddev, rdev, false);
2892 err = 0;
2893 } else if (cmd_match(buf, "-writemostly")) {
2894 mddev_destroy_wb_pool(rdev->mddev, rdev);
2895 clear_bit(WriteMostly, &rdev->flags);
2896 err = 0;
2897 } else if (cmd_match(buf, "blocked")) {
2898 set_bit(Blocked, &rdev->flags);
2899 err = 0;
2900 } else if (cmd_match(buf, "-blocked")) {
2901 if (!test_bit(Faulty, &rdev->flags) &&
2902 !test_bit(ExternalBbl, &rdev->flags) &&
2903 rdev->badblocks.unacked_exist) {
2904
2905
2906
2907 md_error(rdev->mddev, rdev);
2908 }
2909 clear_bit(Blocked, &rdev->flags);
2910 clear_bit(BlockedBadBlocks, &rdev->flags);
2911 wake_up(&rdev->blocked_wait);
2912 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2913 md_wakeup_thread(rdev->mddev->thread);
2914
2915 err = 0;
2916 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2917 set_bit(In_sync, &rdev->flags);
2918 err = 0;
2919 } else if (cmd_match(buf, "failfast")) {
2920 set_bit(FailFast, &rdev->flags);
2921 err = 0;
2922 } else if (cmd_match(buf, "-failfast")) {
2923 clear_bit(FailFast, &rdev->flags);
2924 err = 0;
2925 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2926 !test_bit(Journal, &rdev->flags)) {
2927 if (rdev->mddev->pers == NULL) {
2928 clear_bit(In_sync, &rdev->flags);
2929 rdev->saved_raid_disk = rdev->raid_disk;
2930 rdev->raid_disk = -1;
2931 err = 0;
2932 }
2933 } else if (cmd_match(buf, "write_error")) {
2934 set_bit(WriteErrorSeen, &rdev->flags);
2935 err = 0;
2936 } else if (cmd_match(buf, "-write_error")) {
2937 clear_bit(WriteErrorSeen, &rdev->flags);
2938 err = 0;
2939 } else if (cmd_match(buf, "want_replacement")) {
2940
2941
2942
2943
2944 if (rdev->raid_disk >= 0 &&
2945 !test_bit(Journal, &rdev->flags) &&
2946 !test_bit(Replacement, &rdev->flags))
2947 set_bit(WantReplacement, &rdev->flags);
2948 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2949 md_wakeup_thread(rdev->mddev->thread);
2950 err = 0;
2951 } else if (cmd_match(buf, "-want_replacement")) {
2952
2953
2954
2955 err = 0;
2956 clear_bit(WantReplacement, &rdev->flags);
2957 } else if (cmd_match(buf, "replacement")) {
2958
2959
2960
2961
2962 if (rdev->mddev->pers)
2963 err = -EBUSY;
2964 else {
2965 set_bit(Replacement, &rdev->flags);
2966 err = 0;
2967 }
2968 } else if (cmd_match(buf, "-replacement")) {
2969
2970 if (rdev->mddev->pers)
2971 err = -EBUSY;
2972 else {
2973 clear_bit(Replacement, &rdev->flags);
2974 err = 0;
2975 }
2976 } else if (cmd_match(buf, "re-add")) {
2977 if (!rdev->mddev->pers)
2978 err = -EINVAL;
2979 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
2980 rdev->saved_raid_disk >= 0) {
2981
2982
2983
2984
2985
2986
2987 if (!mddev_is_clustered(rdev->mddev) ||
2988 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2989 clear_bit(Faulty, &rdev->flags);
2990 err = add_bound_rdev(rdev);
2991 }
2992 } else
2993 err = -EBUSY;
2994 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2995 set_bit(ExternalBbl, &rdev->flags);
2996 rdev->badblocks.shift = 0;
2997 err = 0;
2998 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2999 clear_bit(ExternalBbl, &rdev->flags);
3000 err = 0;
3001 }
3002 if (!err)
3003 sysfs_notify_dirent_safe(rdev->sysfs_state);
3004 return err ? err : len;
3005 }
3006 static struct rdev_sysfs_entry rdev_state =
3007 __ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3008
3009 static ssize_t
3010 errors_show(struct md_rdev *rdev, char *page)
3011 {
3012 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3013 }
3014
3015 static ssize_t
3016 errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3017 {
3018 unsigned int n;
3019 int rv;
3020
3021 rv = kstrtouint(buf, 10, &n);
3022 if (rv < 0)
3023 return rv;
3024 atomic_set(&rdev->corrected_errors, n);
3025 return len;
3026 }
3027 static struct rdev_sysfs_entry rdev_errors =
3028 __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3029
3030 static ssize_t
3031 slot_show(struct md_rdev *rdev, char *page)
3032 {
3033 if (test_bit(Journal, &rdev->flags))
3034 return sprintf(page, "journal\n");
3035 else if (rdev->raid_disk < 0)
3036 return sprintf(page, "none\n");
3037 else
3038 return sprintf(page, "%d\n", rdev->raid_disk);
3039 }
3040
3041 static ssize_t
3042 slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3043 {
3044 int slot;
3045 int err;
3046
3047 if (test_bit(Journal, &rdev->flags))
3048 return -EBUSY;
3049 if (strncmp(buf, "none", 4)==0)
3050 slot = -1;
3051 else {
3052 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3053 if (err < 0)
3054 return err;
3055 }
3056 if (rdev->mddev->pers && slot == -1) {
3057
3058
3059
3060
3061
3062
3063
3064 if (rdev->raid_disk == -1)
3065 return -EEXIST;
3066
3067 if (rdev->mddev->pers->hot_remove_disk == NULL)
3068 return -EINVAL;
3069 clear_bit(Blocked, &rdev->flags);
3070 remove_and_add_spares(rdev->mddev, rdev);
3071 if (rdev->raid_disk >= 0)
3072 return -EBUSY;
3073 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3074 md_wakeup_thread(rdev->mddev->thread);
3075 } else if (rdev->mddev->pers) {
3076
3077
3078
3079 int err;
3080
3081 if (rdev->raid_disk != -1)
3082 return -EBUSY;
3083
3084 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3085 return -EBUSY;
3086
3087 if (rdev->mddev->pers->hot_add_disk == NULL)
3088 return -EINVAL;
3089
3090 if (slot >= rdev->mddev->raid_disks &&
3091 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3092 return -ENOSPC;
3093
3094 rdev->raid_disk = slot;
3095 if (test_bit(In_sync, &rdev->flags))
3096 rdev->saved_raid_disk = slot;
3097 else
3098 rdev->saved_raid_disk = -1;
3099 clear_bit(In_sync, &rdev->flags);
3100 clear_bit(Bitmap_sync, &rdev->flags);
3101 err = rdev->mddev->pers->
3102 hot_add_disk(rdev->mddev, rdev);
3103 if (err) {
3104 rdev->raid_disk = -1;
3105 return err;
3106 } else
3107 sysfs_notify_dirent_safe(rdev->sysfs_state);
3108 if (sysfs_link_rdev(rdev->mddev, rdev))
3109 ;
3110
3111 } else {
3112 if (slot >= rdev->mddev->raid_disks &&
3113 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3114 return -ENOSPC;
3115 rdev->raid_disk = slot;
3116
3117 clear_bit(Faulty, &rdev->flags);
3118 clear_bit(WriteMostly, &rdev->flags);
3119 set_bit(In_sync, &rdev->flags);
3120 sysfs_notify_dirent_safe(rdev->sysfs_state);
3121 }
3122 return len;
3123 }
3124
3125 static struct rdev_sysfs_entry rdev_slot =
3126 __ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3127
3128 static ssize_t
3129 offset_show(struct md_rdev *rdev, char *page)
3130 {
3131 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3132 }
3133
3134 static ssize_t
3135 offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3136 {
3137 unsigned long long offset;
3138 if (kstrtoull(buf, 10, &offset) < 0)
3139 return -EINVAL;
3140 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3141 return -EBUSY;
3142 if (rdev->sectors && rdev->mddev->external)
3143
3144
3145 return -EBUSY;
3146 rdev->data_offset = offset;
3147 rdev->new_data_offset = offset;
3148 return len;
3149 }
3150
3151 static struct rdev_sysfs_entry rdev_offset =
3152 __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3153
3154 static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3155 {
3156 return sprintf(page, "%llu\n",
3157 (unsigned long long)rdev->new_data_offset);
3158 }
3159
3160 static ssize_t new_offset_store(struct md_rdev *rdev,
3161 const char *buf, size_t len)
3162 {
3163 unsigned long long new_offset;
3164 struct mddev *mddev = rdev->mddev;
3165
3166 if (kstrtoull(buf, 10, &new_offset) < 0)
3167 return -EINVAL;
3168
3169 if (mddev->sync_thread ||
3170 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3171 return -EBUSY;
3172 if (new_offset == rdev->data_offset)
3173
3174 ;
3175 else if (new_offset > rdev->data_offset) {
3176
3177 if (new_offset - rdev->data_offset
3178 + mddev->dev_sectors > rdev->sectors)
3179 return -E2BIG;
3180 }
3181
3182
3183
3184
3185
3186 if (new_offset < rdev->data_offset &&
3187 mddev->reshape_backwards)
3188 return -EINVAL;
3189
3190
3191
3192
3193 if (new_offset > rdev->data_offset &&
3194 !mddev->reshape_backwards)
3195 return -EINVAL;
3196
3197 if (mddev->pers && mddev->persistent &&
3198 !super_types[mddev->major_version]
3199 .allow_new_offset(rdev, new_offset))
3200 return -E2BIG;
3201 rdev->new_data_offset = new_offset;
3202 if (new_offset > rdev->data_offset)
3203 mddev->reshape_backwards = 1;
3204 else if (new_offset < rdev->data_offset)
3205 mddev->reshape_backwards = 0;
3206
3207 return len;
3208 }
3209 static struct rdev_sysfs_entry rdev_new_offset =
3210 __ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3211
3212 static ssize_t
3213 rdev_size_show(struct md_rdev *rdev, char *page)
3214 {
3215 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3216 }
3217
3218 static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3219 {
3220
3221 if (s1+l1 <= s2)
3222 return 0;
3223 if (s2+l2 <= s1)
3224 return 0;
3225 return 1;
3226 }
3227
3228 static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3229 {
3230 unsigned long long blocks;
3231 sector_t new;
3232
3233 if (kstrtoull(buf, 10, &blocks) < 0)
3234 return -EINVAL;
3235
3236 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3237 return -EINVAL;
3238
3239 new = blocks * 2;
3240 if (new != blocks * 2)
3241 return -EINVAL;
3242
3243 *sectors = new;
3244 return 0;
3245 }
3246
3247 static ssize_t
3248 rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3249 {
3250 struct mddev *my_mddev = rdev->mddev;
3251 sector_t oldsectors = rdev->sectors;
3252 sector_t sectors;
3253
3254 if (test_bit(Journal, &rdev->flags))
3255 return -EBUSY;
3256 if (strict_blocks_to_sectors(buf, §ors) < 0)
3257 return -EINVAL;
3258 if (rdev->data_offset != rdev->new_data_offset)
3259 return -EINVAL;
3260 if (my_mddev->pers && rdev->raid_disk >= 0) {
3261 if (my_mddev->persistent) {
3262 sectors = super_types[my_mddev->major_version].
3263 rdev_size_change(rdev, sectors);
3264 if (!sectors)
3265 return -EBUSY;
3266 } else if (!sectors)
3267 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3268 rdev->data_offset;
3269 if (!my_mddev->pers->resize)
3270
3271 return -EINVAL;
3272 }
3273 if (sectors < my_mddev->dev_sectors)
3274 return -EINVAL;
3275
3276 rdev->sectors = sectors;
3277 if (sectors > oldsectors && my_mddev->external) {
3278
3279
3280
3281
3282
3283
3284 struct mddev *mddev;
3285 int overlap = 0;
3286 struct list_head *tmp;
3287
3288 rcu_read_lock();
3289 for_each_mddev(mddev, tmp) {
3290 struct md_rdev *rdev2;
3291
3292 rdev_for_each(rdev2, mddev)
3293 if (rdev->bdev == rdev2->bdev &&
3294 rdev != rdev2 &&
3295 overlaps(rdev->data_offset, rdev->sectors,
3296 rdev2->data_offset,
3297 rdev2->sectors)) {
3298 overlap = 1;
3299 break;
3300 }
3301 if (overlap) {
3302 mddev_put(mddev);
3303 break;
3304 }
3305 }
3306 rcu_read_unlock();
3307 if (overlap) {
3308
3309
3310
3311
3312
3313
3314 rdev->sectors = oldsectors;
3315 return -EBUSY;
3316 }
3317 }
3318 return len;
3319 }
3320
3321 static struct rdev_sysfs_entry rdev_size =
3322 __ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3323
3324 static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3325 {
3326 unsigned long long recovery_start = rdev->recovery_offset;
3327
3328 if (test_bit(In_sync, &rdev->flags) ||
3329 recovery_start == MaxSector)
3330 return sprintf(page, "none\n");
3331
3332 return sprintf(page, "%llu\n", recovery_start);
3333 }
3334
3335 static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3336 {
3337 unsigned long long recovery_start;
3338
3339 if (cmd_match(buf, "none"))
3340 recovery_start = MaxSector;
3341 else if (kstrtoull(buf, 10, &recovery_start))
3342 return -EINVAL;
3343
3344 if (rdev->mddev->pers &&
3345 rdev->raid_disk >= 0)
3346 return -EBUSY;
3347
3348 rdev->recovery_offset = recovery_start;
3349 if (recovery_start == MaxSector)
3350 set_bit(In_sync, &rdev->flags);
3351 else
3352 clear_bit(In_sync, &rdev->flags);
3353 return len;
3354 }
3355
3356 static struct rdev_sysfs_entry rdev_recovery_start =
3357 __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370 static ssize_t bb_show(struct md_rdev *rdev, char *page)
3371 {
3372 return badblocks_show(&rdev->badblocks, page, 0);
3373 }
3374 static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3375 {
3376 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3377
3378 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3379 wake_up(&rdev->blocked_wait);
3380 return rv;
3381 }
3382 static struct rdev_sysfs_entry rdev_bad_blocks =
3383 __ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3384
3385 static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3386 {
3387 return badblocks_show(&rdev->badblocks, page, 1);
3388 }
3389 static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3390 {
3391 return badblocks_store(&rdev->badblocks, page, len, 1);
3392 }
3393 static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3394 __ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3395
3396 static ssize_t
3397 ppl_sector_show(struct md_rdev *rdev, char *page)
3398 {
3399 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3400 }
3401
3402 static ssize_t
3403 ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3404 {
3405 unsigned long long sector;
3406
3407 if (kstrtoull(buf, 10, §or) < 0)
3408 return -EINVAL;
3409 if (sector != (sector_t)sector)
3410 return -EINVAL;
3411
3412 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3413 rdev->raid_disk >= 0)
3414 return -EBUSY;
3415
3416 if (rdev->mddev->persistent) {
3417 if (rdev->mddev->major_version == 0)
3418 return -EINVAL;
3419 if ((sector > rdev->sb_start &&
3420 sector - rdev->sb_start > S16_MAX) ||
3421 (sector < rdev->sb_start &&
3422 rdev->sb_start - sector > -S16_MIN))
3423 return -EINVAL;
3424 rdev->ppl.offset = sector - rdev->sb_start;
3425 } else if (!rdev->mddev->external) {
3426 return -EBUSY;
3427 }
3428 rdev->ppl.sector = sector;
3429 return len;
3430 }
3431
3432 static struct rdev_sysfs_entry rdev_ppl_sector =
3433 __ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3434
3435 static ssize_t
3436 ppl_size_show(struct md_rdev *rdev, char *page)
3437 {
3438 return sprintf(page, "%u\n", rdev->ppl.size);
3439 }
3440
3441 static ssize_t
3442 ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3443 {
3444 unsigned int size;
3445
3446 if (kstrtouint(buf, 10, &size) < 0)
3447 return -EINVAL;
3448
3449 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3450 rdev->raid_disk >= 0)
3451 return -EBUSY;
3452
3453 if (rdev->mddev->persistent) {
3454 if (rdev->mddev->major_version == 0)
3455 return -EINVAL;
3456 if (size > U16_MAX)
3457 return -EINVAL;
3458 } else if (!rdev->mddev->external) {
3459 return -EBUSY;
3460 }
3461 rdev->ppl.size = size;
3462 return len;
3463 }
3464
3465 static struct rdev_sysfs_entry rdev_ppl_size =
3466 __ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3467
3468 static struct attribute *rdev_default_attrs[] = {
3469 &rdev_state.attr,
3470 &rdev_errors.attr,
3471 &rdev_slot.attr,
3472 &rdev_offset.attr,
3473 &rdev_new_offset.attr,
3474 &rdev_size.attr,
3475 &rdev_recovery_start.attr,
3476 &rdev_bad_blocks.attr,
3477 &rdev_unack_bad_blocks.attr,
3478 &rdev_ppl_sector.attr,
3479 &rdev_ppl_size.attr,
3480 NULL,
3481 };
3482 static ssize_t
3483 rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3484 {
3485 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3486 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3487
3488 if (!entry->show)
3489 return -EIO;
3490 if (!rdev->mddev)
3491 return -ENODEV;
3492 return entry->show(rdev, page);
3493 }
3494
3495 static ssize_t
3496 rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3497 const char *page, size_t length)
3498 {
3499 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3500 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3501 ssize_t rv;
3502 struct mddev *mddev = rdev->mddev;
3503
3504 if (!entry->store)
3505 return -EIO;
3506 if (!capable(CAP_SYS_ADMIN))
3507 return -EACCES;
3508 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3509 if (!rv) {
3510 if (rdev->mddev == NULL)
3511 rv = -ENODEV;
3512 else
3513 rv = entry->store(rdev, page, length);
3514 mddev_unlock(mddev);
3515 }
3516 return rv;
3517 }
3518
3519 static void rdev_free(struct kobject *ko)
3520 {
3521 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3522 kfree(rdev);
3523 }
3524 static const struct sysfs_ops rdev_sysfs_ops = {
3525 .show = rdev_attr_show,
3526 .store = rdev_attr_store,
3527 };
3528 static struct kobj_type rdev_ktype = {
3529 .release = rdev_free,
3530 .sysfs_ops = &rdev_sysfs_ops,
3531 .default_attrs = rdev_default_attrs,
3532 };
3533
3534 int md_rdev_init(struct md_rdev *rdev)
3535 {
3536 rdev->desc_nr = -1;
3537 rdev->saved_raid_disk = -1;
3538 rdev->raid_disk = -1;
3539 rdev->flags = 0;
3540 rdev->data_offset = 0;
3541 rdev->new_data_offset = 0;
3542 rdev->sb_events = 0;
3543 rdev->last_read_error = 0;
3544 rdev->sb_loaded = 0;
3545 rdev->bb_page = NULL;
3546 atomic_set(&rdev->nr_pending, 0);
3547 atomic_set(&rdev->read_errors, 0);
3548 atomic_set(&rdev->corrected_errors, 0);
3549
3550 INIT_LIST_HEAD(&rdev->same_set);
3551 init_waitqueue_head(&rdev->blocked_wait);
3552
3553
3554
3555
3556
3557 return badblocks_init(&rdev->badblocks, 0);
3558 }
3559 EXPORT_SYMBOL_GPL(md_rdev_init);
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570 static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3571 {
3572 char b[BDEVNAME_SIZE];
3573 int err;
3574 struct md_rdev *rdev;
3575 sector_t size;
3576
3577 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3578 if (!rdev)
3579 return ERR_PTR(-ENOMEM);
3580
3581 err = md_rdev_init(rdev);
3582 if (err)
3583 goto abort_free;
3584 err = alloc_disk_sb(rdev);
3585 if (err)
3586 goto abort_free;
3587
3588 err = lock_rdev(rdev, newdev, super_format == -2);
3589 if (err)
3590 goto abort_free;
3591
3592 kobject_init(&rdev->kobj, &rdev_ktype);
3593
3594 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3595 if (!size) {
3596 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3597 bdevname(rdev->bdev,b));
3598 err = -EINVAL;
3599 goto abort_free;
3600 }
3601
3602 if (super_format >= 0) {
3603 err = super_types[super_format].
3604 load_super(rdev, NULL, super_minor);
3605 if (err == -EINVAL) {
3606 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3607 bdevname(rdev->bdev,b),
3608 super_format, super_minor);
3609 goto abort_free;
3610 }
3611 if (err < 0) {
3612 pr_warn("md: could not read %s's sb, not importing!\n",
3613 bdevname(rdev->bdev,b));
3614 goto abort_free;
3615 }
3616 }
3617
3618 return rdev;
3619
3620 abort_free:
3621 if (rdev->bdev)
3622 unlock_rdev(rdev);
3623 md_rdev_clear(rdev);
3624 kfree(rdev);
3625 return ERR_PTR(err);
3626 }
3627
3628
3629
3630
3631
3632 static int analyze_sbs(struct mddev *mddev)
3633 {
3634 int i;
3635 struct md_rdev *rdev, *freshest, *tmp;
3636 char b[BDEVNAME_SIZE];
3637
3638 freshest = NULL;
3639 rdev_for_each_safe(rdev, tmp, mddev)
3640 switch (super_types[mddev->major_version].
3641 load_super(rdev, freshest, mddev->minor_version)) {
3642 case 1:
3643 freshest = rdev;
3644 break;
3645 case 0:
3646 break;
3647 default:
3648 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3649 bdevname(rdev->bdev,b));
3650 md_kick_rdev_from_array(rdev);
3651 }
3652
3653
3654 if (!freshest) {
3655 pr_warn("md: cannot find a valid disk\n");
3656 return -EINVAL;
3657 }
3658
3659 super_types[mddev->major_version].
3660 validate_super(mddev, freshest);
3661
3662 i = 0;
3663 rdev_for_each_safe(rdev, tmp, mddev) {
3664 if (mddev->max_disks &&
3665 (rdev->desc_nr >= mddev->max_disks ||
3666 i > mddev->max_disks)) {
3667 pr_warn("md: %s: %s: only %d devices permitted\n",
3668 mdname(mddev), bdevname(rdev->bdev, b),
3669 mddev->max_disks);
3670 md_kick_rdev_from_array(rdev);
3671 continue;
3672 }
3673 if (rdev != freshest) {
3674 if (super_types[mddev->major_version].
3675 validate_super(mddev, rdev)) {
3676 pr_warn("md: kicking non-fresh %s from array!\n",
3677 bdevname(rdev->bdev,b));
3678 md_kick_rdev_from_array(rdev);
3679 continue;
3680 }
3681 }
3682 if (mddev->level == LEVEL_MULTIPATH) {
3683 rdev->desc_nr = i++;
3684 rdev->raid_disk = rdev->desc_nr;
3685 set_bit(In_sync, &rdev->flags);
3686 } else if (rdev->raid_disk >=
3687 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3688 !test_bit(Journal, &rdev->flags)) {
3689 rdev->raid_disk = -1;
3690 clear_bit(In_sync, &rdev->flags);
3691 }
3692 }
3693
3694 return 0;
3695 }
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707 int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3708 {
3709 unsigned long result = 0;
3710 long decimals = -1;
3711 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3712 if (*cp == '.')
3713 decimals = 0;
3714 else if (decimals < scale) {
3715 unsigned int value;
3716 value = *cp - '0';
3717 result = result * 10 + value;
3718 if (decimals >= 0)
3719 decimals++;
3720 }
3721 cp++;
3722 }
3723 if (*cp == '\n')
3724 cp++;
3725 if (*cp)
3726 return -EINVAL;
3727 if (decimals < 0)
3728 decimals = 0;
3729 *res = result * int_pow(10, scale - decimals);
3730 return 0;
3731 }
3732
3733 static ssize_t
3734 safe_delay_show(struct mddev *mddev, char *page)
3735 {
3736 int msec = (mddev->safemode_delay*1000)/HZ;
3737 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3738 }
3739 static ssize_t
3740 safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3741 {
3742 unsigned long msec;
3743
3744 if (mddev_is_clustered(mddev)) {
3745 pr_warn("md: Safemode is disabled for clustered mode\n");
3746 return -EINVAL;
3747 }
3748
3749 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3750 return -EINVAL;
3751 if (msec == 0)
3752 mddev->safemode_delay = 0;
3753 else {
3754 unsigned long old_delay = mddev->safemode_delay;
3755 unsigned long new_delay = (msec*HZ)/1000;
3756
3757 if (new_delay == 0)
3758 new_delay = 1;
3759 mddev->safemode_delay = new_delay;
3760 if (new_delay < old_delay || old_delay == 0)
3761 mod_timer(&mddev->safemode_timer, jiffies+1);
3762 }
3763 return len;
3764 }
3765 static struct md_sysfs_entry md_safe_delay =
3766 __ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3767
3768 static ssize_t
3769 level_show(struct mddev *mddev, char *page)
3770 {
3771 struct md_personality *p;
3772 int ret;
3773 spin_lock(&mddev->lock);
3774 p = mddev->pers;
3775 if (p)
3776 ret = sprintf(page, "%s\n", p->name);
3777 else if (mddev->clevel[0])
3778 ret = sprintf(page, "%s\n", mddev->clevel);
3779 else if (mddev->level != LEVEL_NONE)
3780 ret = sprintf(page, "%d\n", mddev->level);
3781 else
3782 ret = 0;
3783 spin_unlock(&mddev->lock);
3784 return ret;
3785 }
3786
3787 static ssize_t
3788 level_store(struct mddev *mddev, const char *buf, size_t len)
3789 {
3790 char clevel[16];
3791 ssize_t rv;
3792 size_t slen = len;
3793 struct md_personality *pers, *oldpers;
3794 long level;
3795 void *priv, *oldpriv;
3796 struct md_rdev *rdev;
3797
3798 if (slen == 0 || slen >= sizeof(clevel))
3799 return -EINVAL;
3800
3801 rv = mddev_lock(mddev);
3802 if (rv)
3803 return rv;
3804
3805 if (mddev->pers == NULL) {
3806 strncpy(mddev->clevel, buf, slen);
3807 if (mddev->clevel[slen-1] == '\n')
3808 slen--;
3809 mddev->clevel[slen] = 0;
3810 mddev->level = LEVEL_NONE;
3811 rv = len;
3812 goto out_unlock;
3813 }
3814 rv = -EROFS;
3815 if (mddev->ro)
3816 goto out_unlock;
3817
3818
3819
3820
3821
3822
3823
3824 rv = -EBUSY;
3825 if (mddev->sync_thread ||
3826 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3827 mddev->reshape_position != MaxSector ||
3828 mddev->sysfs_active)
3829 goto out_unlock;
3830
3831 rv = -EINVAL;
3832 if (!mddev->pers->quiesce) {
3833 pr_warn("md: %s: %s does not support online personality change\n",
3834 mdname(mddev), mddev->pers->name);
3835 goto out_unlock;
3836 }
3837
3838
3839 strncpy(clevel, buf, slen);
3840 if (clevel[slen-1] == '\n')
3841 slen--;
3842 clevel[slen] = 0;
3843 if (kstrtol(clevel, 10, &level))
3844 level = LEVEL_NONE;
3845
3846 if (request_module("md-%s", clevel) != 0)
3847 request_module("md-level-%s", clevel);
3848 spin_lock(&pers_lock);
3849 pers = find_pers(level, clevel);
3850 if (!pers || !try_module_get(pers->owner)) {
3851 spin_unlock(&pers_lock);
3852 pr_warn("md: personality %s not loaded\n", clevel);
3853 rv = -EINVAL;
3854 goto out_unlock;
3855 }
3856 spin_unlock(&pers_lock);
3857
3858 if (pers == mddev->pers) {
3859
3860 module_put(pers->owner);
3861 rv = len;
3862 goto out_unlock;
3863 }
3864 if (!pers->takeover) {
3865 module_put(pers->owner);
3866 pr_warn("md: %s: %s does not support personality takeover\n",
3867 mdname(mddev), clevel);
3868 rv = -EINVAL;
3869 goto out_unlock;
3870 }
3871
3872 rdev_for_each(rdev, mddev)
3873 rdev->new_raid_disk = rdev->raid_disk;
3874
3875
3876
3877
3878 priv = pers->takeover(mddev);
3879 if (IS_ERR(priv)) {
3880 mddev->new_level = mddev->level;
3881 mddev->new_layout = mddev->layout;
3882 mddev->new_chunk_sectors = mddev->chunk_sectors;
3883 mddev->raid_disks -= mddev->delta_disks;
3884 mddev->delta_disks = 0;
3885 mddev->reshape_backwards = 0;
3886 module_put(pers->owner);
3887 pr_warn("md: %s: %s would not accept array\n",
3888 mdname(mddev), clevel);
3889 rv = PTR_ERR(priv);
3890 goto out_unlock;
3891 }
3892
3893
3894 mddev_suspend(mddev);
3895 mddev_detach(mddev);
3896
3897 spin_lock(&mddev->lock);
3898 oldpers = mddev->pers;
3899 oldpriv = mddev->private;
3900 mddev->pers = pers;
3901 mddev->private = priv;
3902 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3903 mddev->level = mddev->new_level;
3904 mddev->layout = mddev->new_layout;
3905 mddev->chunk_sectors = mddev->new_chunk_sectors;
3906 mddev->delta_disks = 0;
3907 mddev->reshape_backwards = 0;
3908 mddev->degraded = 0;
3909 spin_unlock(&mddev->lock);
3910
3911 if (oldpers->sync_request == NULL &&
3912 mddev->external) {
3913
3914
3915
3916
3917
3918
3919
3920 mddev->in_sync = 0;
3921 mddev->safemode_delay = 0;
3922 mddev->safemode = 0;
3923 }
3924
3925 oldpers->free(mddev, oldpriv);
3926
3927 if (oldpers->sync_request == NULL &&
3928 pers->sync_request != NULL) {
3929
3930 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3931 pr_warn("md: cannot register extra attributes for %s\n",
3932 mdname(mddev));
3933 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3934 }
3935 if (oldpers->sync_request != NULL &&
3936 pers->sync_request == NULL) {
3937
3938 if (mddev->to_remove == NULL)
3939 mddev->to_remove = &md_redundancy_group;
3940 }
3941
3942 module_put(oldpers->owner);
3943
3944 rdev_for_each(rdev, mddev) {
3945 if (rdev->raid_disk < 0)
3946 continue;
3947 if (rdev->new_raid_disk >= mddev->raid_disks)
3948 rdev->new_raid_disk = -1;
3949 if (rdev->new_raid_disk == rdev->raid_disk)
3950 continue;
3951 sysfs_unlink_rdev(mddev, rdev);
3952 }
3953 rdev_for_each(rdev, mddev) {
3954 if (rdev->raid_disk < 0)
3955 continue;
3956 if (rdev->new_raid_disk == rdev->raid_disk)
3957 continue;
3958 rdev->raid_disk = rdev->new_raid_disk;
3959 if (rdev->raid_disk < 0)
3960 clear_bit(In_sync, &rdev->flags);
3961 else {
3962 if (sysfs_link_rdev(mddev, rdev))
3963 pr_warn("md: cannot register rd%d for %s after level change\n",
3964 rdev->raid_disk, mdname(mddev));
3965 }
3966 }
3967
3968 if (pers->sync_request == NULL) {
3969
3970
3971
3972 mddev->in_sync = 1;
3973 del_timer_sync(&mddev->safemode_timer);
3974 }
3975 blk_set_stacking_limits(&mddev->queue->limits);
3976 pers->run(mddev);
3977 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3978 mddev_resume(mddev);
3979 if (!mddev->thread)
3980 md_update_sb(mddev, 1);
3981 sysfs_notify(&mddev->kobj, NULL, "level");
3982 md_new_event(mddev);
3983 rv = len;
3984 out_unlock:
3985 mddev_unlock(mddev);
3986 return rv;
3987 }
3988
3989 static struct md_sysfs_entry md_level =
3990 __ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3991
3992 static ssize_t
3993 layout_show(struct mddev *mddev, char *page)
3994 {
3995
3996 if (mddev->reshape_position != MaxSector &&
3997 mddev->layout != mddev->new_layout)
3998 return sprintf(page, "%d (%d)\n",
3999 mddev->new_layout, mddev->layout);
4000 return sprintf(page, "%d\n", mddev->layout);
4001 }
4002
4003 static ssize_t
4004 layout_store(struct mddev *mddev, const char *buf, size_t len)
4005 {
4006 unsigned int n;
4007 int err;
4008
4009 err = kstrtouint(buf, 10, &n);
4010 if (err < 0)
4011 return err;
4012 err = mddev_lock(mddev);
4013 if (err)
4014 return err;
4015
4016 if (mddev->pers) {
4017 if (mddev->pers->check_reshape == NULL)
4018 err = -EBUSY;
4019 else if (mddev->ro)
4020 err = -EROFS;
4021 else {
4022 mddev->new_layout = n;
4023 err = mddev->pers->check_reshape(mddev);
4024 if (err)
4025 mddev->new_layout = mddev->layout;
4026 }
4027 } else {
4028 mddev->new_layout = n;
4029 if (mddev->reshape_position == MaxSector)
4030 mddev->layout = n;
4031 }
4032 mddev_unlock(mddev);
4033 return err ?: len;
4034 }
4035 static struct md_sysfs_entry md_layout =
4036 __ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4037
4038 static ssize_t
4039 raid_disks_show(struct mddev *mddev, char *page)
4040 {
4041 if (mddev->raid_disks == 0)
4042 return 0;
4043 if (mddev->reshape_position != MaxSector &&
4044 mddev->delta_disks != 0)
4045 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4046 mddev->raid_disks - mddev->delta_disks);
4047 return sprintf(page, "%d\n", mddev->raid_disks);
4048 }
4049
4050 static int update_raid_disks(struct mddev *mddev, int raid_disks);
4051
4052 static ssize_t
4053 raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4054 {
4055 unsigned int n;
4056 int err;
4057
4058 err = kstrtouint(buf, 10, &n);
4059 if (err < 0)
4060 return err;
4061
4062 err = mddev_lock(mddev);
4063 if (err)
4064 return err;
4065 if (mddev->pers)
4066 err = update_raid_disks(mddev, n);
4067 else if (mddev->reshape_position != MaxSector) {
4068 struct md_rdev *rdev;
4069 int olddisks = mddev->raid_disks - mddev->delta_disks;
4070
4071 err = -EINVAL;
4072 rdev_for_each(rdev, mddev) {
4073 if (olddisks < n &&
4074 rdev->data_offset < rdev->new_data_offset)
4075 goto out_unlock;
4076 if (olddisks > n &&
4077 rdev->data_offset > rdev->new_data_offset)
4078 goto out_unlock;
4079 }
4080 err = 0;
4081 mddev->delta_disks = n - olddisks;
4082 mddev->raid_disks = n;
4083 mddev->reshape_backwards = (mddev->delta_disks < 0);
4084 } else
4085 mddev->raid_disks = n;
4086 out_unlock:
4087 mddev_unlock(mddev);
4088 return err ? err : len;
4089 }
4090 static struct md_sysfs_entry md_raid_disks =
4091 __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4092
4093 static ssize_t
4094 chunk_size_show(struct mddev *mddev, char *page)
4095 {
4096 if (mddev->reshape_position != MaxSector &&
4097 mddev->chunk_sectors != mddev->new_chunk_sectors)
4098 return sprintf(page, "%d (%d)\n",
4099 mddev->new_chunk_sectors << 9,
4100 mddev->chunk_sectors << 9);
4101 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4102 }
4103
4104 static ssize_t
4105 chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4106 {
4107 unsigned long n;
4108 int err;
4109
4110 err = kstrtoul(buf, 10, &n);
4111 if (err < 0)
4112 return err;
4113
4114 err = mddev_lock(mddev);
4115 if (err)
4116 return err;
4117 if (mddev->pers) {
4118 if (mddev->pers->check_reshape == NULL)
4119 err = -EBUSY;
4120 else if (mddev->ro)
4121 err = -EROFS;
4122 else {
4123 mddev->new_chunk_sectors = n >> 9;
4124 err = mddev->pers->check_reshape(mddev);
4125 if (err)
4126 mddev->new_chunk_sectors = mddev->chunk_sectors;
4127 }
4128 } else {
4129 mddev->new_chunk_sectors = n >> 9;
4130 if (mddev->reshape_position == MaxSector)
4131 mddev->chunk_sectors = n >> 9;
4132 }
4133 mddev_unlock(mddev);
4134 return err ?: len;
4135 }
4136 static struct md_sysfs_entry md_chunk_size =
4137 __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4138
4139 static ssize_t
4140 resync_start_show(struct mddev *mddev, char *page)
4141 {
4142 if (mddev->recovery_cp == MaxSector)
4143 return sprintf(page, "none\n");
4144 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4145 }
4146
4147 static ssize_t
4148 resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4149 {
4150 unsigned long long n;
4151 int err;
4152
4153 if (cmd_match(buf, "none"))
4154 n = MaxSector;
4155 else {
4156 err = kstrtoull(buf, 10, &n);
4157 if (err < 0)
4158 return err;
4159 if (n != (sector_t)n)
4160 return -EINVAL;
4161 }
4162
4163 err = mddev_lock(mddev);
4164 if (err)
4165 return err;
4166 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4167 err = -EBUSY;
4168
4169 if (!err) {
4170 mddev->recovery_cp = n;
4171 if (mddev->pers)
4172 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4173 }
4174 mddev_unlock(mddev);
4175 return err ?: len;
4176 }
4177 static struct md_sysfs_entry md_resync_start =
4178 __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4179 resync_start_show, resync_start_store);
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222 enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4223 write_pending, active_idle, broken, bad_word};
4224 static char *array_states[] = {
4225 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4226 "write-pending", "active-idle", "broken", NULL };
4227
4228 static int match_word(const char *word, char **list)
4229 {
4230 int n;
4231 for (n=0; list[n]; n++)
4232 if (cmd_match(word, list[n]))
4233 break;
4234 return n;
4235 }
4236
4237 static ssize_t
4238 array_state_show(struct mddev *mddev, char *page)
4239 {
4240 enum array_state st = inactive;
4241
4242 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4243 switch(mddev->ro) {
4244 case 1:
4245 st = readonly;
4246 break;
4247 case 2:
4248 st = read_auto;
4249 break;
4250 case 0:
4251 spin_lock(&mddev->lock);
4252 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4253 st = write_pending;
4254 else if (mddev->in_sync)
4255 st = clean;
4256 else if (mddev->safemode)
4257 st = active_idle;
4258 else
4259 st = active;
4260 spin_unlock(&mddev->lock);
4261 }
4262
4263 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4264 st = broken;
4265 } else {
4266 if (list_empty(&mddev->disks) &&
4267 mddev->raid_disks == 0 &&
4268 mddev->dev_sectors == 0)
4269 st = clear;
4270 else
4271 st = inactive;
4272 }
4273 return sprintf(page, "%s\n", array_states[st]);
4274 }
4275
4276 static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4277 static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4278 static int do_md_run(struct mddev *mddev);
4279 static int restart_array(struct mddev *mddev);
4280
4281 static ssize_t
4282 array_state_store(struct mddev *mddev, const char *buf, size_t len)
4283 {
4284 int err = 0;
4285 enum array_state st = match_word(buf, array_states);
4286
4287 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4288
4289
4290
4291 spin_lock(&mddev->lock);
4292 if (st == active) {
4293 restart_array(mddev);
4294 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4295 md_wakeup_thread(mddev->thread);
4296 wake_up(&mddev->sb_wait);
4297 } else {
4298 restart_array(mddev);
4299 if (!set_in_sync(mddev))
4300 err = -EBUSY;
4301 }
4302 if (!err)
4303 sysfs_notify_dirent_safe(mddev->sysfs_state);
4304 spin_unlock(&mddev->lock);
4305 return err ?: len;
4306 }
4307 err = mddev_lock(mddev);
4308 if (err)
4309 return err;
4310 err = -EINVAL;
4311 switch(st) {
4312 case bad_word:
4313 break;
4314 case clear:
4315
4316 err = do_md_stop(mddev, 0, NULL);
4317 break;
4318 case inactive:
4319
4320 if (mddev->pers)
4321 err = do_md_stop(mddev, 2, NULL);
4322 else
4323 err = 0;
4324 break;
4325 case suspended:
4326 break;
4327 case readonly:
4328 if (mddev->pers)
4329 err = md_set_readonly(mddev, NULL);
4330 else {
4331 mddev->ro = 1;
4332 set_disk_ro(mddev->gendisk, 1);
4333 err = do_md_run(mddev);
4334 }
4335 break;
4336 case read_auto:
4337 if (mddev->pers) {
4338 if (mddev->ro == 0)
4339 err = md_set_readonly(mddev, NULL);
4340 else if (mddev->ro == 1)
4341 err = restart_array(mddev);
4342 if (err == 0) {
4343 mddev->ro = 2;
4344 set_disk_ro(mddev->gendisk, 0);
4345 }
4346 } else {
4347 mddev->ro = 2;
4348 err = do_md_run(mddev);
4349 }
4350 break;
4351 case clean:
4352 if (mddev->pers) {
4353 err = restart_array(mddev);
4354 if (err)
4355 break;
4356 spin_lock(&mddev->lock);
4357 if (!set_in_sync(mddev))
4358 err = -EBUSY;
4359 spin_unlock(&mddev->lock);
4360 } else
4361 err = -EINVAL;
4362 break;
4363 case active:
4364 if (mddev->pers) {
4365 err = restart_array(mddev);
4366 if (err)
4367 break;
4368 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4369 wake_up(&mddev->sb_wait);
4370 err = 0;
4371 } else {
4372 mddev->ro = 0;
4373 set_disk_ro(mddev->gendisk, 0);
4374 err = do_md_run(mddev);
4375 }
4376 break;
4377 case write_pending:
4378 case active_idle:
4379 case broken:
4380
4381 break;
4382 }
4383
4384 if (!err) {
4385 if (mddev->hold_active == UNTIL_IOCTL)
4386 mddev->hold_active = 0;
4387 sysfs_notify_dirent_safe(mddev->sysfs_state);
4388 }
4389 mddev_unlock(mddev);
4390 return err ?: len;
4391 }
4392 static struct md_sysfs_entry md_array_state =
4393 __ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4394
4395 static ssize_t
4396 max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4397 return sprintf(page, "%d\n",
4398 atomic_read(&mddev->max_corr_read_errors));
4399 }
4400
4401 static ssize_t
4402 max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4403 {
4404 unsigned int n;
4405 int rv;
4406
4407 rv = kstrtouint(buf, 10, &n);
4408 if (rv < 0)
4409 return rv;
4410 atomic_set(&mddev->max_corr_read_errors, n);
4411 return len;
4412 }
4413
4414 static struct md_sysfs_entry max_corr_read_errors =
4415 __ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4416 max_corrected_read_errors_store);
4417
4418 static ssize_t
4419 null_show(struct mddev *mddev, char *page)
4420 {
4421 return -EINVAL;
4422 }
4423
4424 static ssize_t
4425 new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4426 {
4427
4428
4429
4430
4431
4432
4433
4434 char *e;
4435 int major = simple_strtoul(buf, &e, 10);
4436 int minor;
4437 dev_t dev;
4438 struct md_rdev *rdev;
4439 int err;
4440
4441 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4442 return -EINVAL;
4443 minor = simple_strtoul(e+1, &e, 10);
4444 if (*e && *e != '\n')
4445 return -EINVAL;
4446 dev = MKDEV(major, minor);
4447 if (major != MAJOR(dev) ||
4448 minor != MINOR(dev))
4449 return -EOVERFLOW;
4450
4451 flush_workqueue(md_misc_wq);
4452
4453 err = mddev_lock(mddev);
4454 if (err)
4455 return err;
4456 if (mddev->persistent) {
4457 rdev = md_import_device(dev, mddev->major_version,
4458 mddev->minor_version);
4459 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4460 struct md_rdev *rdev0
4461 = list_entry(mddev->disks.next,
4462 struct md_rdev, same_set);
4463 err = super_types[mddev->major_version]
4464 .load_super(rdev, rdev0, mddev->minor_version);
4465 if (err < 0)
4466 goto out;
4467 }
4468 } else if (mddev->external)
4469 rdev = md_import_device(dev, -2, -1);
4470 else
4471 rdev = md_import_device(dev, -1, -1);
4472
4473 if (IS_ERR(rdev)) {
4474 mddev_unlock(mddev);
4475 return PTR_ERR(rdev);
4476 }
4477 err = bind_rdev_to_array(rdev, mddev);
4478 out:
4479 if (err)
4480 export_rdev(rdev);
4481 mddev_unlock(mddev);
4482 if (!err)
4483 md_new_event(mddev);
4484 return err ? err : len;
4485 }
4486
4487 static struct md_sysfs_entry md_new_device =
4488 __ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4489
4490 static ssize_t
4491 bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4492 {
4493 char *end;
4494 unsigned long chunk, end_chunk;
4495 int err;
4496
4497 err = mddev_lock(mddev);
4498 if (err)
4499 return err;
4500 if (!mddev->bitmap)
4501 goto out;
4502
4503 while (*buf) {
4504 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4505 if (buf == end) break;
4506 if (*end == '-') {
4507 buf = end + 1;
4508 end_chunk = simple_strtoul(buf, &end, 0);
4509 if (buf == end) break;
4510 }
4511 if (*end && !isspace(*end)) break;
4512 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4513 buf = skip_spaces(end);
4514 }
4515 md_bitmap_unplug(mddev->bitmap);
4516 out:
4517 mddev_unlock(mddev);
4518 return len;
4519 }
4520
4521 static struct md_sysfs_entry md_bitmap =
4522 __ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4523
4524 static ssize_t
4525 size_show(struct mddev *mddev, char *page)
4526 {
4527 return sprintf(page, "%llu\n",
4528 (unsigned long long)mddev->dev_sectors / 2);
4529 }
4530
4531 static int update_size(struct mddev *mddev, sector_t num_sectors);
4532
4533 static ssize_t
4534 size_store(struct mddev *mddev, const char *buf, size_t len)
4535 {
4536
4537
4538
4539
4540 sector_t sectors;
4541 int err = strict_blocks_to_sectors(buf, §ors);
4542
4543 if (err < 0)
4544 return err;
4545 err = mddev_lock(mddev);
4546 if (err)
4547 return err;
4548 if (mddev->pers) {
4549 err = update_size(mddev, sectors);
4550 if (err == 0)
4551 md_update_sb(mddev, 1);
4552 } else {
4553 if (mddev->dev_sectors == 0 ||
4554 mddev->dev_sectors > sectors)
4555 mddev->dev_sectors = sectors;
4556 else
4557 err = -ENOSPC;
4558 }
4559 mddev_unlock(mddev);
4560 return err ? err : len;
4561 }
4562
4563 static struct md_sysfs_entry md_size =
4564 __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4565
4566
4567
4568
4569
4570
4571
4572 static ssize_t
4573 metadata_show(struct mddev *mddev, char *page)
4574 {
4575 if (mddev->persistent)
4576 return sprintf(page, "%d.%d\n",
4577 mddev->major_version, mddev->minor_version);
4578 else if (mddev->external)
4579 return sprintf(page, "external:%s\n", mddev->metadata_type);
4580 else
4581 return sprintf(page, "none\n");
4582 }
4583
4584 static ssize_t
4585 metadata_store(struct mddev *mddev, const char *buf, size_t len)
4586 {
4587 int major, minor;
4588 char *e;
4589 int err;
4590
4591
4592
4593
4594
4595 err = mddev_lock(mddev);
4596 if (err)
4597 return err;
4598 err = -EBUSY;
4599 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4600 ;
4601 else if (!list_empty(&mddev->disks))
4602 goto out_unlock;
4603
4604 err = 0;
4605 if (cmd_match(buf, "none")) {
4606 mddev->persistent = 0;
4607 mddev->external = 0;
4608 mddev->major_version = 0;
4609 mddev->minor_version = 90;
4610 goto out_unlock;
4611 }
4612 if (strncmp(buf, "external:", 9) == 0) {
4613 size_t namelen = len-9;
4614 if (namelen >= sizeof(mddev->metadata_type))
4615 namelen = sizeof(mddev->metadata_type)-1;
4616 strncpy(mddev->metadata_type, buf+9, namelen);
4617 mddev->metadata_type[namelen] = 0;
4618 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4619 mddev->metadata_type[--namelen] = 0;
4620 mddev->persistent = 0;
4621 mddev->external = 1;
4622 mddev->major_version = 0;
4623 mddev->minor_version = 90;
4624 goto out_unlock;
4625 }
4626 major = simple_strtoul(buf, &e, 10);
4627 err = -EINVAL;
4628 if (e==buf || *e != '.')
4629 goto out_unlock;
4630 buf = e+1;
4631 minor = simple_strtoul(buf, &e, 10);
4632 if (e==buf || (*e && *e != '\n') )
4633 goto out_unlock;
4634 err = -ENOENT;
4635 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4636 goto out_unlock;
4637 mddev->major_version = major;
4638 mddev->minor_version = minor;
4639 mddev->persistent = 1;
4640 mddev->external = 0;
4641 err = 0;
4642 out_unlock:
4643 mddev_unlock(mddev);
4644 return err ?: len;
4645 }
4646
4647 static struct md_sysfs_entry md_metadata =
4648 __ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4649
4650 static ssize_t
4651 action_show(struct mddev *mddev, char *page)
4652 {
4653 char *type = "idle";
4654 unsigned long recovery = mddev->recovery;
4655 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4656 type = "frozen";
4657 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4658 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4659 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4660 type = "reshape";
4661 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4662 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4663 type = "resync";
4664 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4665 type = "check";
4666 else
4667 type = "repair";
4668 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4669 type = "recover";
4670 else if (mddev->reshape_position != MaxSector)
4671 type = "reshape";
4672 }
4673 return sprintf(page, "%s\n", type);
4674 }
4675
4676 static ssize_t
4677 action_store(struct mddev *mddev, const char *page, size_t len)
4678 {
4679 if (!mddev->pers || !mddev->pers->sync_request)
4680 return -EINVAL;
4681
4682
4683 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4684 if (cmd_match(page, "frozen"))
4685 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4686 else
4687 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4688 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4689 mddev_lock(mddev) == 0) {
4690 flush_workqueue(md_misc_wq);
4691 if (mddev->sync_thread) {
4692 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4693 md_reap_sync_thread(mddev);
4694 }
4695 mddev_unlock(mddev);
4696 }
4697 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4698 return -EBUSY;
4699 else if (cmd_match(page, "resync"))
4700 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4701 else if (cmd_match(page, "recover")) {
4702 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4703 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4704 } else if (cmd_match(page, "reshape")) {
4705 int err;
4706 if (mddev->pers->start_reshape == NULL)
4707 return -EINVAL;
4708 err = mddev_lock(mddev);
4709 if (!err) {
4710 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4711 err = -EBUSY;
4712 else {
4713 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4714 err = mddev->pers->start_reshape(mddev);
4715 }
4716 mddev_unlock(mddev);
4717 }
4718 if (err)
4719 return err;
4720 sysfs_notify(&mddev->kobj, NULL, "degraded");
4721 } else {
4722 if (cmd_match(page, "check"))
4723 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4724 else if (!cmd_match(page, "repair"))
4725 return -EINVAL;
4726 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4727 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4728 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4729 }
4730 if (mddev->ro == 2) {
4731
4732
4733
4734 mddev->ro = 0;
4735 md_wakeup_thread(mddev->sync_thread);
4736 }
4737 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4738 md_wakeup_thread(mddev->thread);
4739 sysfs_notify_dirent_safe(mddev->sysfs_action);
4740 return len;
4741 }
4742
4743 static struct md_sysfs_entry md_scan_mode =
4744 __ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4745
4746 static ssize_t
4747 last_sync_action_show(struct mddev *mddev, char *page)
4748 {
4749 return sprintf(page, "%s\n", mddev->last_sync_action);
4750 }
4751
4752 static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4753
4754 static ssize_t
4755 mismatch_cnt_show(struct mddev *mddev, char *page)
4756 {
4757 return sprintf(page, "%llu\n",
4758 (unsigned long long)
4759 atomic64_read(&mddev->resync_mismatches));
4760 }
4761
4762 static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4763
4764 static ssize_t
4765 sync_min_show(struct mddev *mddev, char *page)
4766 {
4767 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4768 mddev->sync_speed_min ? "local": "system");
4769 }
4770
4771 static ssize_t
4772 sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4773 {
4774 unsigned int min;
4775 int rv;
4776
4777 if (strncmp(buf, "system", 6)==0) {
4778 min = 0;
4779 } else {
4780 rv = kstrtouint(buf, 10, &min);
4781 if (rv < 0)
4782 return rv;
4783 if (min == 0)
4784 return -EINVAL;
4785 }
4786 mddev->sync_speed_min = min;
4787 return len;
4788 }
4789
4790 static struct md_sysfs_entry md_sync_min =
4791 __ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4792
4793 static ssize_t
4794 sync_max_show(struct mddev *mddev, char *page)
4795 {
4796 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4797 mddev->sync_speed_max ? "local": "system");
4798 }
4799
4800 static ssize_t
4801 sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4802 {
4803 unsigned int max;
4804 int rv;
4805
4806 if (strncmp(buf, "system", 6)==0) {
4807 max = 0;
4808 } else {
4809 rv = kstrtouint(buf, 10, &max);
4810 if (rv < 0)
4811 return rv;
4812 if (max == 0)
4813 return -EINVAL;
4814 }
4815 mddev->sync_speed_max = max;
4816 return len;
4817 }
4818
4819 static struct md_sysfs_entry md_sync_max =
4820 __ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4821
4822 static ssize_t
4823 degraded_show(struct mddev *mddev, char *page)
4824 {
4825 return sprintf(page, "%d\n", mddev->degraded);
4826 }
4827 static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4828
4829 static ssize_t
4830 sync_force_parallel_show(struct mddev *mddev, char *page)
4831 {
4832 return sprintf(page, "%d\n", mddev->parallel_resync);
4833 }
4834
4835 static ssize_t
4836 sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4837 {
4838 long n;
4839
4840 if (kstrtol(buf, 10, &n))
4841 return -EINVAL;
4842
4843 if (n != 0 && n != 1)
4844 return -EINVAL;
4845
4846 mddev->parallel_resync = n;
4847
4848 if (mddev->sync_thread)
4849 wake_up(&resync_wait);
4850
4851 return len;
4852 }
4853
4854
4855 static struct md_sysfs_entry md_sync_force_parallel =
4856 __ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4857 sync_force_parallel_show, sync_force_parallel_store);
4858
4859 static ssize_t
4860 sync_speed_show(struct mddev *mddev, char *page)
4861 {
4862 unsigned long resync, dt, db;
4863 if (mddev->curr_resync == 0)
4864 return sprintf(page, "none\n");
4865 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4866 dt = (jiffies - mddev->resync_mark) / HZ;
4867 if (!dt) dt++;
4868 db = resync - mddev->resync_mark_cnt;
4869 return sprintf(page, "%lu\n", db/dt/2);
4870 }
4871
4872 static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4873
4874 static ssize_t
4875 sync_completed_show(struct mddev *mddev, char *page)
4876 {
4877 unsigned long long max_sectors, resync;
4878
4879 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4880 return sprintf(page, "none\n");
4881
4882 if (mddev->curr_resync == 1 ||
4883 mddev->curr_resync == 2)
4884 return sprintf(page, "delayed\n");
4885
4886 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4887 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4888 max_sectors = mddev->resync_max_sectors;
4889 else
4890 max_sectors = mddev->dev_sectors;
4891
4892 resync = mddev->curr_resync_completed;
4893 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4894 }
4895
4896 static struct md_sysfs_entry md_sync_completed =
4897 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4898
4899 static ssize_t
4900 min_sync_show(struct mddev *mddev, char *page)
4901 {
4902 return sprintf(page, "%llu\n",
4903 (unsigned long long)mddev->resync_min);
4904 }
4905 static ssize_t
4906 min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4907 {
4908 unsigned long long min;
4909 int err;
4910
4911 if (kstrtoull(buf, 10, &min))
4912 return -EINVAL;
4913
4914 spin_lock(&mddev->lock);
4915 err = -EINVAL;
4916 if (min > mddev->resync_max)
4917 goto out_unlock;
4918
4919 err = -EBUSY;
4920 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4921 goto out_unlock;
4922
4923
4924 mddev->resync_min = round_down(min, 8);
4925 err = 0;
4926
4927 out_unlock:
4928 spin_unlock(&mddev->lock);
4929 return err ?: len;
4930 }
4931
4932 static struct md_sysfs_entry md_min_sync =
4933 __ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4934
4935 static ssize_t
4936 max_sync_show(struct mddev *mddev, char *page)
4937 {
4938 if (mddev->resync_max == MaxSector)
4939 return sprintf(page, "max\n");
4940 else
4941 return sprintf(page, "%llu\n",
4942 (unsigned long long)mddev->resync_max);
4943 }
4944 static ssize_t
4945 max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4946 {
4947 int err;
4948 spin_lock(&mddev->lock);
4949 if (strncmp(buf, "max", 3) == 0)
4950 mddev->resync_max = MaxSector;
4951 else {
4952 unsigned long long max;
4953 int chunk;
4954
4955 err = -EINVAL;
4956 if (kstrtoull(buf, 10, &max))
4957 goto out_unlock;
4958 if (max < mddev->resync_min)
4959 goto out_unlock;
4960
4961 err = -EBUSY;
4962 if (max < mddev->resync_max &&
4963 mddev->ro == 0 &&
4964 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4965 goto out_unlock;
4966
4967
4968 chunk = mddev->chunk_sectors;
4969 if (chunk) {
4970 sector_t temp = max;
4971
4972 err = -EINVAL;
4973 if (sector_div(temp, chunk))
4974 goto out_unlock;
4975 }
4976 mddev->resync_max = max;
4977 }
4978 wake_up(&mddev->recovery_wait);
4979 err = 0;
4980 out_unlock:
4981 spin_unlock(&mddev->lock);
4982 return err ?: len;
4983 }
4984
4985 static struct md_sysfs_entry md_max_sync =
4986 __ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4987
4988 static ssize_t
4989 suspend_lo_show(struct mddev *mddev, char *page)
4990 {
4991 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4992 }
4993
4994 static ssize_t
4995 suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4996 {
4997 unsigned long long new;
4998 int err;
4999
5000 err = kstrtoull(buf, 10, &new);
5001 if (err < 0)
5002 return err;
5003 if (new != (sector_t)new)
5004 return -EINVAL;
5005
5006 err = mddev_lock(mddev);
5007 if (err)
5008 return err;
5009 err = -EINVAL;
5010 if (mddev->pers == NULL ||
5011 mddev->pers->quiesce == NULL)
5012 goto unlock;
5013 mddev_suspend(mddev);
5014 mddev->suspend_lo = new;
5015 mddev_resume(mddev);
5016
5017 err = 0;
5018 unlock:
5019 mddev_unlock(mddev);
5020 return err ?: len;
5021 }
5022 static struct md_sysfs_entry md_suspend_lo =
5023 __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5024
5025 static ssize_t
5026 suspend_hi_show(struct mddev *mddev, char *page)
5027 {
5028 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5029 }
5030
5031 static ssize_t
5032 suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5033 {
5034 unsigned long long new;
5035 int err;
5036
5037 err = kstrtoull(buf, 10, &new);
5038 if (err < 0)
5039 return err;
5040 if (new != (sector_t)new)
5041 return -EINVAL;
5042
5043 err = mddev_lock(mddev);
5044 if (err)
5045 return err;
5046 err = -EINVAL;
5047 if (mddev->pers == NULL)
5048 goto unlock;
5049
5050 mddev_suspend(mddev);
5051 mddev->suspend_hi = new;
5052 mddev_resume(mddev);
5053
5054 err = 0;
5055 unlock:
5056 mddev_unlock(mddev);
5057 return err ?: len;
5058 }
5059 static struct md_sysfs_entry md_suspend_hi =
5060 __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5061
5062 static ssize_t
5063 reshape_position_show(struct mddev *mddev, char *page)
5064 {
5065 if (mddev->reshape_position != MaxSector)
5066 return sprintf(page, "%llu\n",
5067 (unsigned long long)mddev->reshape_position);
5068 strcpy(page, "none\n");
5069 return 5;
5070 }
5071
5072 static ssize_t
5073 reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5074 {
5075 struct md_rdev *rdev;
5076 unsigned long long new;
5077 int err;
5078
5079 err = kstrtoull(buf, 10, &new);
5080 if (err < 0)
5081 return err;
5082 if (new != (sector_t)new)
5083 return -EINVAL;
5084 err = mddev_lock(mddev);
5085 if (err)
5086 return err;
5087 err = -EBUSY;
5088 if (mddev->pers)
5089 goto unlock;
5090 mddev->reshape_position = new;
5091 mddev->delta_disks = 0;
5092 mddev->reshape_backwards = 0;
5093 mddev->new_level = mddev->level;
5094 mddev->new_layout = mddev->layout;
5095 mddev->new_chunk_sectors = mddev->chunk_sectors;
5096 rdev_for_each(rdev, mddev)
5097 rdev->new_data_offset = rdev->data_offset;
5098 err = 0;
5099 unlock:
5100 mddev_unlock(mddev);
5101 return err ?: len;
5102 }
5103
5104 static struct md_sysfs_entry md_reshape_position =
5105 __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5106 reshape_position_store);
5107
5108 static ssize_t
5109 reshape_direction_show(struct mddev *mddev, char *page)
5110 {
5111 return sprintf(page, "%s\n",
5112 mddev->reshape_backwards ? "backwards" : "forwards");
5113 }
5114
5115 static ssize_t
5116 reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5117 {
5118 int backwards = 0;
5119 int err;
5120
5121 if (cmd_match(buf, "forwards"))
5122 backwards = 0;
5123 else if (cmd_match(buf, "backwards"))
5124 backwards = 1;
5125 else
5126 return -EINVAL;
5127 if (mddev->reshape_backwards == backwards)
5128 return len;
5129
5130 err = mddev_lock(mddev);
5131 if (err)
5132 return err;
5133
5134 if (mddev->delta_disks)
5135 err = -EBUSY;
5136 else if (mddev->persistent &&
5137 mddev->major_version == 0)
5138 err = -EINVAL;
5139 else
5140 mddev->reshape_backwards = backwards;
5141 mddev_unlock(mddev);
5142 return err ?: len;
5143 }
5144
5145 static struct md_sysfs_entry md_reshape_direction =
5146 __ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5147 reshape_direction_store);
5148
5149 static ssize_t
5150 array_size_show(struct mddev *mddev, char *page)
5151 {
5152 if (mddev->external_size)
5153 return sprintf(page, "%llu\n",
5154 (unsigned long long)mddev->array_sectors/2);
5155 else
5156 return sprintf(page, "default\n");
5157 }
5158
5159 static ssize_t
5160 array_size_store(struct mddev *mddev, const char *buf, size_t len)
5161 {
5162 sector_t sectors;
5163 int err;
5164
5165 err = mddev_lock(mddev);
5166 if (err)
5167 return err;
5168
5169
5170 if (mddev_is_clustered(mddev)) {
5171 mddev_unlock(mddev);
5172 return -EINVAL;
5173 }
5174
5175 if (strncmp(buf, "default", 7) == 0) {
5176 if (mddev->pers)
5177 sectors = mddev->pers->size(mddev, 0, 0);
5178 else
5179 sectors = mddev->array_sectors;
5180
5181 mddev->external_size = 0;
5182 } else {
5183 if (strict_blocks_to_sectors(buf, §ors) < 0)
5184 err = -EINVAL;
5185 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5186 err = -E2BIG;
5187 else
5188 mddev->external_size = 1;
5189 }
5190
5191 if (!err) {
5192 mddev->array_sectors = sectors;
5193 if (mddev->pers) {
5194 set_capacity(mddev->gendisk, mddev->array_sectors);
5195 revalidate_disk(mddev->gendisk);
5196 }
5197 }
5198 mddev_unlock(mddev);
5199 return err ?: len;
5200 }
5201
5202 static struct md_sysfs_entry md_array_size =
5203 __ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5204 array_size_store);
5205
5206 static ssize_t
5207 consistency_policy_show(struct mddev *mddev, char *page)
5208 {
5209 int ret;
5210
5211 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5212 ret = sprintf(page, "journal\n");
5213 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5214 ret = sprintf(page, "ppl\n");
5215 } else if (mddev->bitmap) {
5216 ret = sprintf(page, "bitmap\n");
5217 } else if (mddev->pers) {
5218 if (mddev->pers->sync_request)
5219 ret = sprintf(page, "resync\n");
5220 else
5221 ret = sprintf(page, "none\n");
5222 } else {
5223 ret = sprintf(page, "unknown\n");
5224 }
5225
5226 return ret;
5227 }
5228
5229 static ssize_t
5230 consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5231 {
5232 int err = 0;
5233
5234 if (mddev->pers) {
5235 if (mddev->pers->change_consistency_policy)
5236 err = mddev->pers->change_consistency_policy(mddev, buf);
5237 else
5238 err = -EBUSY;
5239 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5240 set_bit(MD_HAS_PPL, &mddev->flags);
5241 } else {
5242 err = -EINVAL;
5243 }
5244
5245 return err ? err : len;
5246 }
5247
5248 static struct md_sysfs_entry md_consistency_policy =
5249 __ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5250 consistency_policy_store);
5251
5252 static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5253 {
5254 return sprintf(page, "%d\n", mddev->fail_last_dev);
5255 }
5256
5257
5258
5259
5260
5261 static ssize_t
5262 fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5263 {
5264 int ret;
5265 bool value;
5266
5267 ret = kstrtobool(buf, &value);
5268 if (ret)
5269 return ret;
5270
5271 if (value != mddev->fail_last_dev)
5272 mddev->fail_last_dev = value;
5273
5274 return len;
5275 }
5276 static struct md_sysfs_entry md_fail_last_dev =
5277 __ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5278 fail_last_dev_store);
5279
5280 static struct attribute *md_default_attrs[] = {
5281 &md_level.attr,
5282 &md_layout.attr,
5283 &md_raid_disks.attr,
5284 &md_chunk_size.attr,
5285 &md_size.attr,
5286 &md_resync_start.attr,
5287 &md_metadata.attr,
5288 &md_new_device.attr,
5289 &md_safe_delay.attr,
5290 &md_array_state.attr,
5291 &md_reshape_position.attr,
5292 &md_reshape_direction.attr,
5293 &md_array_size.attr,
5294 &max_corr_read_errors.attr,
5295 &md_consistency_policy.attr,
5296 &md_fail_last_dev.attr,
5297 NULL,
5298 };
5299
5300 static struct attribute *md_redundancy_attrs[] = {
5301 &md_scan_mode.attr,
5302 &md_last_scan_mode.attr,
5303 &md_mismatches.attr,
5304 &md_sync_min.attr,
5305 &md_sync_max.attr,
5306 &md_sync_speed.attr,
5307 &md_sync_force_parallel.attr,
5308 &md_sync_completed.attr,
5309 &md_min_sync.attr,
5310 &md_max_sync.attr,
5311 &md_suspend_lo.attr,
5312 &md_suspend_hi.attr,
5313 &md_bitmap.attr,
5314 &md_degraded.attr,
5315 NULL,
5316 };
5317 static struct attribute_group md_redundancy_group = {
5318 .name = NULL,
5319 .attrs = md_redundancy_attrs,
5320 };
5321
5322 static ssize_t
5323 md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5324 {
5325 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5326 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5327 ssize_t rv;
5328
5329 if (!entry->show)
5330 return -EIO;
5331 spin_lock(&all_mddevs_lock);
5332 if (list_empty(&mddev->all_mddevs)) {
5333 spin_unlock(&all_mddevs_lock);
5334 return -EBUSY;
5335 }
5336 mddev_get(mddev);
5337 spin_unlock(&all_mddevs_lock);
5338
5339 rv = entry->show(mddev, page);
5340 mddev_put(mddev);
5341 return rv;
5342 }
5343
5344 static ssize_t
5345 md_attr_store(struct kobject *kobj, struct attribute *attr,
5346 const char *page, size_t length)
5347 {
5348 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5349 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5350 ssize_t rv;
5351
5352 if (!entry->store)
5353 return -EIO;
5354 if (!capable(CAP_SYS_ADMIN))
5355 return -EACCES;
5356 spin_lock(&all_mddevs_lock);
5357 if (list_empty(&mddev->all_mddevs)) {
5358 spin_unlock(&all_mddevs_lock);
5359 return -EBUSY;
5360 }
5361 mddev_get(mddev);
5362 spin_unlock(&all_mddevs_lock);
5363 rv = entry->store(mddev, page, length);
5364 mddev_put(mddev);
5365 return rv;
5366 }
5367
5368 static void md_free(struct kobject *ko)
5369 {
5370 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5371
5372 if (mddev->sysfs_state)
5373 sysfs_put(mddev->sysfs_state);
5374
5375 if (mddev->gendisk)
5376 del_gendisk(mddev->gendisk);
5377 if (mddev->queue)
5378 blk_cleanup_queue(mddev->queue);
5379 if (mddev->gendisk)
5380 put_disk(mddev->gendisk);
5381 percpu_ref_exit(&mddev->writes_pending);
5382
5383 bioset_exit(&mddev->bio_set);
5384 bioset_exit(&mddev->sync_set);
5385 kfree(mddev);
5386 }
5387
5388 static const struct sysfs_ops md_sysfs_ops = {
5389 .show = md_attr_show,
5390 .store = md_attr_store,
5391 };
5392 static struct kobj_type md_ktype = {
5393 .release = md_free,
5394 .sysfs_ops = &md_sysfs_ops,
5395 .default_attrs = md_default_attrs,
5396 };
5397
5398 int mdp_major = 0;
5399
5400 static void mddev_delayed_delete(struct work_struct *ws)
5401 {
5402 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5403
5404 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5405 kobject_del(&mddev->kobj);
5406 kobject_put(&mddev->kobj);
5407 }
5408
5409 static void no_op(struct percpu_ref *r) {}
5410
5411 int mddev_init_writes_pending(struct mddev *mddev)
5412 {
5413 if (mddev->writes_pending.percpu_count_ptr)
5414 return 0;
5415 if (percpu_ref_init(&mddev->writes_pending, no_op,
5416 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5417 return -ENOMEM;
5418
5419 percpu_ref_put(&mddev->writes_pending);
5420 return 0;
5421 }
5422 EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5423
5424 static int md_alloc(dev_t dev, char *name)
5425 {
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435 static DEFINE_MUTEX(disks_mutex);
5436 struct mddev *mddev = mddev_find(dev);
5437 struct gendisk *disk;
5438 int partitioned;
5439 int shift;
5440 int unit;
5441 int error;
5442
5443 if (!mddev)
5444 return -ENODEV;
5445
5446 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5447 shift = partitioned ? MdpMinorShift : 0;
5448 unit = MINOR(mddev->unit) >> shift;
5449
5450
5451
5452
5453 flush_workqueue(md_misc_wq);
5454
5455 mutex_lock(&disks_mutex);
5456 error = -EEXIST;
5457 if (mddev->gendisk)
5458 goto abort;
5459
5460 if (name && !dev) {
5461
5462
5463 struct mddev *mddev2;
5464 spin_lock(&all_mddevs_lock);
5465
5466 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5467 if (mddev2->gendisk &&
5468 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5469 spin_unlock(&all_mddevs_lock);
5470 goto abort;
5471 }
5472 spin_unlock(&all_mddevs_lock);
5473 }
5474 if (name && dev)
5475
5476
5477
5478 mddev->hold_active = UNTIL_STOP;
5479
5480 error = -ENOMEM;
5481 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5482 if (!mddev->queue)
5483 goto abort;
5484 mddev->queue->queuedata = mddev;
5485
5486 blk_queue_make_request(mddev->queue, md_make_request);
5487 blk_set_stacking_limits(&mddev->queue->limits);
5488
5489 disk = alloc_disk(1 << shift);
5490 if (!disk) {
5491 blk_cleanup_queue(mddev->queue);
5492 mddev->queue = NULL;
5493 goto abort;
5494 }
5495 disk->major = MAJOR(mddev->unit);
5496 disk->first_minor = unit << shift;
5497 if (name)
5498 strcpy(disk->disk_name, name);
5499 else if (partitioned)
5500 sprintf(disk->disk_name, "md_d%d", unit);
5501 else
5502 sprintf(disk->disk_name, "md%d", unit);
5503 disk->fops = &md_fops;
5504 disk->private_data = mddev;
5505 disk->queue = mddev->queue;
5506 blk_queue_write_cache(mddev->queue, true, true);
5507
5508
5509
5510
5511 disk->flags |= GENHD_FL_EXT_DEVT;
5512 mddev->gendisk = disk;
5513
5514
5515
5516 mutex_lock(&mddev->open_mutex);
5517 add_disk(disk);
5518
5519 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5520 if (error) {
5521
5522
5523
5524 pr_debug("md: cannot register %s/md - name in use\n",
5525 disk->disk_name);
5526 error = 0;
5527 }
5528 if (mddev->kobj.sd &&
5529 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5530 pr_debug("pointless warning\n");
5531 mutex_unlock(&mddev->open_mutex);
5532 abort:
5533 mutex_unlock(&disks_mutex);
5534 if (!error && mddev->kobj.sd) {
5535 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5536 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5537 }
5538 mddev_put(mddev);
5539 return error;
5540 }
5541
5542 static struct kobject *md_probe(dev_t dev, int *part, void *data)
5543 {
5544 if (create_on_open)
5545 md_alloc(dev, NULL);
5546 return NULL;
5547 }
5548
5549 static int add_named_array(const char *val, const struct kernel_param *kp)
5550 {
5551
5552
5553
5554
5555
5556
5557
5558 int len = strlen(val);
5559 char buf[DISK_NAME_LEN];
5560 unsigned long devnum;
5561
5562 while (len && val[len-1] == '\n')
5563 len--;
5564 if (len >= DISK_NAME_LEN)
5565 return -E2BIG;
5566 strlcpy(buf, val, len+1);
5567 if (strncmp(buf, "md_", 3) == 0)
5568 return md_alloc(0, buf);
5569 if (strncmp(buf, "md", 2) == 0 &&
5570 isdigit(buf[2]) &&
5571 kstrtoul(buf+2, 10, &devnum) == 0 &&
5572 devnum <= MINORMASK)
5573 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5574
5575 return -EINVAL;
5576 }
5577
5578 static void md_safemode_timeout(struct timer_list *t)
5579 {
5580 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5581
5582 mddev->safemode = 1;
5583 if (mddev->external)
5584 sysfs_notify_dirent_safe(mddev->sysfs_state);
5585
5586 md_wakeup_thread(mddev->thread);
5587 }
5588
5589 static int start_dirty_degraded;
5590
5591 int md_run(struct mddev *mddev)
5592 {
5593 int err;
5594 struct md_rdev *rdev;
5595 struct md_personality *pers;
5596
5597 if (list_empty(&mddev->disks))
5598
5599 return -EINVAL;
5600
5601 if (mddev->pers)
5602 return -EBUSY;
5603
5604 if (mddev->sysfs_active)
5605 return -EBUSY;
5606
5607
5608
5609
5610 if (!mddev->raid_disks) {
5611 if (!mddev->persistent)
5612 return -EINVAL;
5613 err = analyze_sbs(mddev);
5614 if (err)
5615 return -EINVAL;
5616 }
5617
5618 if (mddev->level != LEVEL_NONE)
5619 request_module("md-level-%d", mddev->level);
5620 else if (mddev->clevel[0])
5621 request_module("md-%s", mddev->clevel);
5622
5623
5624
5625
5626
5627
5628 mddev->has_superblocks = false;
5629 rdev_for_each(rdev, mddev) {
5630 if (test_bit(Faulty, &rdev->flags))
5631 continue;
5632 sync_blockdev(rdev->bdev);
5633 invalidate_bdev(rdev->bdev);
5634 if (mddev->ro != 1 &&
5635 (bdev_read_only(rdev->bdev) ||
5636 bdev_read_only(rdev->meta_bdev))) {
5637 mddev->ro = 1;
5638 if (mddev->gendisk)
5639 set_disk_ro(mddev->gendisk, 1);
5640 }
5641
5642 if (rdev->sb_page)
5643 mddev->has_superblocks = true;
5644
5645
5646
5647
5648
5649 if (rdev->meta_bdev) {
5650 ;
5651 } else if (rdev->data_offset < rdev->sb_start) {
5652 if (mddev->dev_sectors &&
5653 rdev->data_offset + mddev->dev_sectors
5654 > rdev->sb_start) {
5655 pr_warn("md: %s: data overlaps metadata\n",
5656 mdname(mddev));
5657 return -EINVAL;
5658 }
5659 } else {
5660 if (rdev->sb_start + rdev->sb_size/512
5661 > rdev->data_offset) {
5662 pr_warn("md: %s: metadata overlaps data\n",
5663 mdname(mddev));
5664 return -EINVAL;
5665 }
5666 }
5667 sysfs_notify_dirent_safe(rdev->sysfs_state);
5668 }
5669
5670 if (!bioset_initialized(&mddev->bio_set)) {
5671 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5672 if (err)
5673 return err;
5674 }
5675 if (!bioset_initialized(&mddev->sync_set)) {
5676 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5677 if (err)
5678 return err;
5679 }
5680
5681 spin_lock(&pers_lock);
5682 pers = find_pers(mddev->level, mddev->clevel);
5683 if (!pers || !try_module_get(pers->owner)) {
5684 spin_unlock(&pers_lock);
5685 if (mddev->level != LEVEL_NONE)
5686 pr_warn("md: personality for level %d is not loaded!\n",
5687 mddev->level);
5688 else
5689 pr_warn("md: personality for level %s is not loaded!\n",
5690 mddev->clevel);
5691 err = -EINVAL;
5692 goto abort;
5693 }
5694 spin_unlock(&pers_lock);
5695 if (mddev->level != pers->level) {
5696 mddev->level = pers->level;
5697 mddev->new_level = pers->level;
5698 }
5699 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5700
5701 if (mddev->reshape_position != MaxSector &&
5702 pers->start_reshape == NULL) {
5703
5704 module_put(pers->owner);
5705 err = -EINVAL;
5706 goto abort;
5707 }
5708
5709 if (pers->sync_request) {
5710
5711
5712
5713 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5714 struct md_rdev *rdev2;
5715 int warned = 0;
5716
5717 rdev_for_each(rdev, mddev)
5718 rdev_for_each(rdev2, mddev) {
5719 if (rdev < rdev2 &&
5720 rdev->bdev->bd_contains ==
5721 rdev2->bdev->bd_contains) {
5722 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5723 mdname(mddev),
5724 bdevname(rdev->bdev,b),
5725 bdevname(rdev2->bdev,b2));
5726 warned = 1;
5727 }
5728 }
5729
5730 if (warned)
5731 pr_warn("True protection against single-disk failure might be compromised.\n");
5732 }
5733
5734 mddev->recovery = 0;
5735
5736 mddev->resync_max_sectors = mddev->dev_sectors;
5737
5738 mddev->ok_start_degraded = start_dirty_degraded;
5739
5740 if (start_readonly && mddev->ro == 0)
5741 mddev->ro = 2;
5742
5743 err = pers->run(mddev);
5744 if (err)
5745 pr_warn("md: pers->run() failed ...\n");
5746 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5747 WARN_ONCE(!mddev->external_size,
5748 "%s: default size too small, but 'external_size' not in effect?\n",
5749 __func__);
5750 pr_warn("md: invalid array_size %llu > default size %llu\n",
5751 (unsigned long long)mddev->array_sectors / 2,
5752 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5753 err = -EINVAL;
5754 }
5755 if (err == 0 && pers->sync_request &&
5756 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5757 struct bitmap *bitmap;
5758
5759 bitmap = md_bitmap_create(mddev, -1);
5760 if (IS_ERR(bitmap)) {
5761 err = PTR_ERR(bitmap);
5762 pr_warn("%s: failed to create bitmap (%d)\n",
5763 mdname(mddev), err);
5764 } else
5765 mddev->bitmap = bitmap;
5766
5767 }
5768 if (err)
5769 goto bitmap_abort;
5770
5771 if (mddev->bitmap_info.max_write_behind > 0) {
5772 bool creat_pool = false;
5773
5774 rdev_for_each(rdev, mddev) {
5775 if (test_bit(WriteMostly, &rdev->flags) &&
5776 rdev_init_wb(rdev))
5777 creat_pool = true;
5778 }
5779 if (creat_pool && mddev->wb_info_pool == NULL) {
5780 mddev->wb_info_pool =
5781 mempool_create_kmalloc_pool(NR_WB_INFOS,
5782 sizeof(struct wb_info));
5783 if (!mddev->wb_info_pool) {
5784 err = -ENOMEM;
5785 goto bitmap_abort;
5786 }
5787 }
5788 }
5789
5790 if (mddev->queue) {
5791 bool nonrot = true;
5792
5793 rdev_for_each(rdev, mddev) {
5794 if (rdev->raid_disk >= 0 &&
5795 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5796 nonrot = false;
5797 break;
5798 }
5799 }
5800 if (mddev->degraded)
5801 nonrot = false;
5802 if (nonrot)
5803 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5804 else
5805 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5806 mddev->queue->backing_dev_info->congested_data = mddev;
5807 mddev->queue->backing_dev_info->congested_fn = md_congested;
5808 }
5809 if (pers->sync_request) {
5810 if (mddev->kobj.sd &&
5811 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5812 pr_warn("md: cannot register extra attributes for %s\n",
5813 mdname(mddev));
5814 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5815 } else if (mddev->ro == 2)
5816 mddev->ro = 0;
5817
5818 atomic_set(&mddev->max_corr_read_errors,
5819 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5820 mddev->safemode = 0;
5821 if (mddev_is_clustered(mddev))
5822 mddev->safemode_delay = 0;
5823 else
5824 mddev->safemode_delay = (200 * HZ)/1000 +1;
5825 mddev->in_sync = 1;
5826 smp_wmb();
5827 spin_lock(&mddev->lock);
5828 mddev->pers = pers;
5829 spin_unlock(&mddev->lock);
5830 rdev_for_each(rdev, mddev)
5831 if (rdev->raid_disk >= 0)
5832 sysfs_link_rdev(mddev, rdev);
5833
5834 if (mddev->degraded && !mddev->ro)
5835
5836
5837
5838 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5839 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5840
5841 if (mddev->sb_flags)
5842 md_update_sb(mddev, 0);
5843
5844 md_new_event(mddev);
5845 return 0;
5846
5847 bitmap_abort:
5848 mddev_detach(mddev);
5849 if (mddev->private)
5850 pers->free(mddev, mddev->private);
5851 mddev->private = NULL;
5852 module_put(pers->owner);
5853 md_bitmap_destroy(mddev);
5854 abort:
5855 bioset_exit(&mddev->bio_set);
5856 bioset_exit(&mddev->sync_set);
5857 return err;
5858 }
5859 EXPORT_SYMBOL_GPL(md_run);
5860
5861 static int do_md_run(struct mddev *mddev)
5862 {
5863 int err;
5864
5865 set_bit(MD_NOT_READY, &mddev->flags);
5866 err = md_run(mddev);
5867 if (err)
5868 goto out;
5869 err = md_bitmap_load(mddev);
5870 if (err) {
5871 md_bitmap_destroy(mddev);
5872 goto out;
5873 }
5874
5875 if (mddev_is_clustered(mddev))
5876 md_allow_write(mddev);
5877
5878
5879 md_start(mddev);
5880
5881 md_wakeup_thread(mddev->thread);
5882 md_wakeup_thread(mddev->sync_thread);
5883
5884 set_capacity(mddev->gendisk, mddev->array_sectors);
5885 revalidate_disk(mddev->gendisk);
5886 clear_bit(MD_NOT_READY, &mddev->flags);
5887 mddev->changed = 1;
5888 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5889 sysfs_notify_dirent_safe(mddev->sysfs_state);
5890 sysfs_notify_dirent_safe(mddev->sysfs_action);
5891 sysfs_notify(&mddev->kobj, NULL, "degraded");
5892 out:
5893 clear_bit(MD_NOT_READY, &mddev->flags);
5894 return err;
5895 }
5896
5897 int md_start(struct mddev *mddev)
5898 {
5899 int ret = 0;
5900
5901 if (mddev->pers->start) {
5902 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5903 md_wakeup_thread(mddev->thread);
5904 ret = mddev->pers->start(mddev);
5905 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5906 md_wakeup_thread(mddev->sync_thread);
5907 }
5908 return ret;
5909 }
5910 EXPORT_SYMBOL_GPL(md_start);
5911
5912 static int restart_array(struct mddev *mddev)
5913 {
5914 struct gendisk *disk = mddev->gendisk;
5915 struct md_rdev *rdev;
5916 bool has_journal = false;
5917 bool has_readonly = false;
5918
5919
5920 if (list_empty(&mddev->disks))
5921 return -ENXIO;
5922 if (!mddev->pers)
5923 return -EINVAL;
5924 if (!mddev->ro)
5925 return -EBUSY;
5926
5927 rcu_read_lock();
5928 rdev_for_each_rcu(rdev, mddev) {
5929 if (test_bit(Journal, &rdev->flags) &&
5930 !test_bit(Faulty, &rdev->flags))
5931 has_journal = true;
5932 if (bdev_read_only(rdev->bdev))
5933 has_readonly = true;
5934 }
5935 rcu_read_unlock();
5936 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
5937
5938 return -EINVAL;
5939 if (has_readonly)
5940 return -EROFS;
5941
5942 mddev->safemode = 0;
5943 mddev->ro = 0;
5944 set_disk_ro(disk, 0);
5945 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5946
5947 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5948 md_wakeup_thread(mddev->thread);
5949 md_wakeup_thread(mddev->sync_thread);
5950 sysfs_notify_dirent_safe(mddev->sysfs_state);
5951 return 0;
5952 }
5953
5954 static void md_clean(struct mddev *mddev)
5955 {
5956 mddev->array_sectors = 0;
5957 mddev->external_size = 0;
5958 mddev->dev_sectors = 0;
5959 mddev->raid_disks = 0;
5960 mddev->recovery_cp = 0;
5961 mddev->resync_min = 0;
5962 mddev->resync_max = MaxSector;
5963 mddev->reshape_position = MaxSector;
5964 mddev->external = 0;
5965 mddev->persistent = 0;
5966 mddev->level = LEVEL_NONE;
5967 mddev->clevel[0] = 0;
5968 mddev->flags = 0;
5969 mddev->sb_flags = 0;
5970 mddev->ro = 0;
5971 mddev->metadata_type[0] = 0;
5972 mddev->chunk_sectors = 0;
5973 mddev->ctime = mddev->utime = 0;
5974 mddev->layout = 0;
5975 mddev->max_disks = 0;
5976 mddev->events = 0;
5977 mddev->can_decrease_events = 0;
5978 mddev->delta_disks = 0;
5979 mddev->reshape_backwards = 0;
5980 mddev->new_level = LEVEL_NONE;
5981 mddev->new_layout = 0;
5982 mddev->new_chunk_sectors = 0;
5983 mddev->curr_resync = 0;
5984 atomic64_set(&mddev->resync_mismatches, 0);
5985 mddev->suspend_lo = mddev->suspend_hi = 0;
5986 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5987 mddev->recovery = 0;
5988 mddev->in_sync = 0;
5989 mddev->changed = 0;
5990 mddev->degraded = 0;
5991 mddev->safemode = 0;
5992 mddev->private = NULL;
5993 mddev->cluster_info = NULL;
5994 mddev->bitmap_info.offset = 0;
5995 mddev->bitmap_info.default_offset = 0;
5996 mddev->bitmap_info.default_space = 0;
5997 mddev->bitmap_info.chunksize = 0;
5998 mddev->bitmap_info.daemon_sleep = 0;
5999 mddev->bitmap_info.max_write_behind = 0;
6000 mddev->bitmap_info.nodes = 0;
6001 }
6002
6003 static void __md_stop_writes(struct mddev *mddev)
6004 {
6005 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6006 flush_workqueue(md_misc_wq);
6007 if (mddev->sync_thread) {
6008 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6009 md_reap_sync_thread(mddev);
6010 }
6011
6012 del_timer_sync(&mddev->safemode_timer);
6013
6014 if (mddev->pers && mddev->pers->quiesce) {
6015 mddev->pers->quiesce(mddev, 1);
6016 mddev->pers->quiesce(mddev, 0);
6017 }
6018 md_bitmap_flush(mddev);
6019
6020 if (mddev->ro == 0 &&
6021 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6022 mddev->sb_flags)) {
6023
6024 if (!mddev_is_clustered(mddev))
6025 mddev->in_sync = 1;
6026 md_update_sb(mddev, 1);
6027 }
6028 mempool_destroy(mddev->wb_info_pool);
6029 mddev->wb_info_pool = NULL;
6030 }
6031
6032 void md_stop_writes(struct mddev *mddev)
6033 {
6034 mddev_lock_nointr(mddev);
6035 __md_stop_writes(mddev);
6036 mddev_unlock(mddev);
6037 }
6038 EXPORT_SYMBOL_GPL(md_stop_writes);
6039
6040 static void mddev_detach(struct mddev *mddev)
6041 {
6042 md_bitmap_wait_behind_writes(mddev);
6043 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6044 mddev->pers->quiesce(mddev, 1);
6045 mddev->pers->quiesce(mddev, 0);
6046 }
6047 md_unregister_thread(&mddev->thread);
6048 if (mddev->queue)
6049 blk_sync_queue(mddev->queue);
6050 }
6051
6052 static void __md_stop(struct mddev *mddev)
6053 {
6054 struct md_personality *pers = mddev->pers;
6055 md_bitmap_destroy(mddev);
6056 mddev_detach(mddev);
6057
6058 flush_workqueue(md_misc_wq);
6059 spin_lock(&mddev->lock);
6060 mddev->pers = NULL;
6061 spin_unlock(&mddev->lock);
6062 pers->free(mddev, mddev->private);
6063 mddev->private = NULL;
6064 if (pers->sync_request && mddev->to_remove == NULL)
6065 mddev->to_remove = &md_redundancy_group;
6066 module_put(pers->owner);
6067 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6068 }
6069
6070 void md_stop(struct mddev *mddev)
6071 {
6072
6073
6074
6075 __md_stop(mddev);
6076 bioset_exit(&mddev->bio_set);
6077 bioset_exit(&mddev->sync_set);
6078 }
6079
6080 EXPORT_SYMBOL_GPL(md_stop);
6081
6082 static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6083 {
6084 int err = 0;
6085 int did_freeze = 0;
6086
6087 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6088 did_freeze = 1;
6089 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6090 md_wakeup_thread(mddev->thread);
6091 }
6092 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6093 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6094 if (mddev->sync_thread)
6095
6096
6097 wake_up_process(mddev->sync_thread->tsk);
6098
6099 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6100 return -EBUSY;
6101 mddev_unlock(mddev);
6102 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6103 &mddev->recovery));
6104 wait_event(mddev->sb_wait,
6105 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6106 mddev_lock_nointr(mddev);
6107
6108 mutex_lock(&mddev->open_mutex);
6109 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6110 mddev->sync_thread ||
6111 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6112 pr_warn("md: %s still in use.\n",mdname(mddev));
6113 if (did_freeze) {
6114 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6115 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6116 md_wakeup_thread(mddev->thread);
6117 }
6118 err = -EBUSY;
6119 goto out;
6120 }
6121 if (mddev->pers) {
6122 __md_stop_writes(mddev);
6123
6124 err = -ENXIO;
6125 if (mddev->ro==1)
6126 goto out;
6127 mddev->ro = 1;
6128 set_disk_ro(mddev->gendisk, 1);
6129 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6130 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6131 md_wakeup_thread(mddev->thread);
6132 sysfs_notify_dirent_safe(mddev->sysfs_state);
6133 err = 0;
6134 }
6135 out:
6136 mutex_unlock(&mddev->open_mutex);
6137 return err;
6138 }
6139
6140
6141
6142
6143
6144 static int do_md_stop(struct mddev *mddev, int mode,
6145 struct block_device *bdev)
6146 {
6147 struct gendisk *disk = mddev->gendisk;
6148 struct md_rdev *rdev;
6149 int did_freeze = 0;
6150
6151 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6152 did_freeze = 1;
6153 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6154 md_wakeup_thread(mddev->thread);
6155 }
6156 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6157 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6158 if (mddev->sync_thread)
6159
6160
6161 wake_up_process(mddev->sync_thread->tsk);
6162
6163 mddev_unlock(mddev);
6164 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6165 !test_bit(MD_RECOVERY_RUNNING,
6166 &mddev->recovery)));
6167 mddev_lock_nointr(mddev);
6168
6169 mutex_lock(&mddev->open_mutex);
6170 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6171 mddev->sysfs_active ||
6172 mddev->sync_thread ||
6173 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6174 pr_warn("md: %s still in use.\n",mdname(mddev));
6175 mutex_unlock(&mddev->open_mutex);
6176 if (did_freeze) {
6177 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6178 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6179 md_wakeup_thread(mddev->thread);
6180 }
6181 return -EBUSY;
6182 }
6183 if (mddev->pers) {
6184 if (mddev->ro)
6185 set_disk_ro(disk, 0);
6186
6187 __md_stop_writes(mddev);
6188 __md_stop(mddev);
6189 mddev->queue->backing_dev_info->congested_fn = NULL;
6190
6191
6192 sysfs_notify_dirent_safe(mddev->sysfs_state);
6193
6194 rdev_for_each(rdev, mddev)
6195 if (rdev->raid_disk >= 0)
6196 sysfs_unlink_rdev(mddev, rdev);
6197
6198 set_capacity(disk, 0);
6199 mutex_unlock(&mddev->open_mutex);
6200 mddev->changed = 1;
6201 revalidate_disk(disk);
6202
6203 if (mddev->ro)
6204 mddev->ro = 0;
6205 } else
6206 mutex_unlock(&mddev->open_mutex);
6207
6208
6209
6210 if (mode == 0) {
6211 pr_info("md: %s stopped.\n", mdname(mddev));
6212
6213 if (mddev->bitmap_info.file) {
6214 struct file *f = mddev->bitmap_info.file;
6215 spin_lock(&mddev->lock);
6216 mddev->bitmap_info.file = NULL;
6217 spin_unlock(&mddev->lock);
6218 fput(f);
6219 }
6220 mddev->bitmap_info.offset = 0;
6221
6222 export_array(mddev);
6223
6224 md_clean(mddev);
6225 if (mddev->hold_active == UNTIL_STOP)
6226 mddev->hold_active = 0;
6227 }
6228 md_new_event(mddev);
6229 sysfs_notify_dirent_safe(mddev->sysfs_state);
6230 return 0;
6231 }
6232
6233 #ifndef MODULE
6234 static void autorun_array(struct mddev *mddev)
6235 {
6236 struct md_rdev *rdev;
6237 int err;
6238
6239 if (list_empty(&mddev->disks))
6240 return;
6241
6242 pr_info("md: running: ");
6243
6244 rdev_for_each(rdev, mddev) {
6245 char b[BDEVNAME_SIZE];
6246 pr_cont("<%s>", bdevname(rdev->bdev,b));
6247 }
6248 pr_cont("\n");
6249
6250 err = do_md_run(mddev);
6251 if (err) {
6252 pr_warn("md: do_md_run() returned %d\n", err);
6253 do_md_stop(mddev, 0, NULL);
6254 }
6255 }
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269 static void autorun_devices(int part)
6270 {
6271 struct md_rdev *rdev0, *rdev, *tmp;
6272 struct mddev *mddev;
6273 char b[BDEVNAME_SIZE];
6274
6275 pr_info("md: autorun ...\n");
6276 while (!list_empty(&pending_raid_disks)) {
6277 int unit;
6278 dev_t dev;
6279 LIST_HEAD(candidates);
6280 rdev0 = list_entry(pending_raid_disks.next,
6281 struct md_rdev, same_set);
6282
6283 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6284 INIT_LIST_HEAD(&candidates);
6285 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6286 if (super_90_load(rdev, rdev0, 0) >= 0) {
6287 pr_debug("md: adding %s ...\n",
6288 bdevname(rdev->bdev,b));
6289 list_move(&rdev->same_set, &candidates);
6290 }
6291
6292
6293
6294
6295
6296 if (part) {
6297 dev = MKDEV(mdp_major,
6298 rdev0->preferred_minor << MdpMinorShift);
6299 unit = MINOR(dev) >> MdpMinorShift;
6300 } else {
6301 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6302 unit = MINOR(dev);
6303 }
6304 if (rdev0->preferred_minor != unit) {
6305 pr_warn("md: unit number in %s is bad: %d\n",
6306 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6307 break;
6308 }
6309
6310 md_probe(dev, NULL, NULL);
6311 mddev = mddev_find(dev);
6312 if (!mddev || !mddev->gendisk) {
6313 if (mddev)
6314 mddev_put(mddev);
6315 break;
6316 }
6317 if (mddev_lock(mddev))
6318 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6319 else if (mddev->raid_disks || mddev->major_version
6320 || !list_empty(&mddev->disks)) {
6321 pr_warn("md: %s already running, cannot run %s\n",
6322 mdname(mddev), bdevname(rdev0->bdev,b));
6323 mddev_unlock(mddev);
6324 } else {
6325 pr_debug("md: created %s\n", mdname(mddev));
6326 mddev->persistent = 1;
6327 rdev_for_each_list(rdev, tmp, &candidates) {
6328 list_del_init(&rdev->same_set);
6329 if (bind_rdev_to_array(rdev, mddev))
6330 export_rdev(rdev);
6331 }
6332 autorun_array(mddev);
6333 mddev_unlock(mddev);
6334 }
6335
6336
6337
6338 rdev_for_each_list(rdev, tmp, &candidates) {
6339 list_del_init(&rdev->same_set);
6340 export_rdev(rdev);
6341 }
6342 mddev_put(mddev);
6343 }
6344 pr_info("md: ... autorun DONE.\n");
6345 }
6346 #endif
6347
6348 static int get_version(void __user *arg)
6349 {
6350 mdu_version_t ver;
6351
6352 ver.major = MD_MAJOR_VERSION;
6353 ver.minor = MD_MINOR_VERSION;
6354 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6355
6356 if (copy_to_user(arg, &ver, sizeof(ver)))
6357 return -EFAULT;
6358
6359 return 0;
6360 }
6361
6362 static int get_array_info(struct mddev *mddev, void __user *arg)
6363 {
6364 mdu_array_info_t info;
6365 int nr,working,insync,failed,spare;
6366 struct md_rdev *rdev;
6367
6368 nr = working = insync = failed = spare = 0;
6369 rcu_read_lock();
6370 rdev_for_each_rcu(rdev, mddev) {
6371 nr++;
6372 if (test_bit(Faulty, &rdev->flags))
6373 failed++;
6374 else {
6375 working++;
6376 if (test_bit(In_sync, &rdev->flags))
6377 insync++;
6378 else if (test_bit(Journal, &rdev->flags))
6379
6380 ;
6381 else
6382 spare++;
6383 }
6384 }
6385 rcu_read_unlock();
6386
6387 info.major_version = mddev->major_version;
6388 info.minor_version = mddev->minor_version;
6389 info.patch_version = MD_PATCHLEVEL_VERSION;
6390 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6391 info.level = mddev->level;
6392 info.size = mddev->dev_sectors / 2;
6393 if (info.size != mddev->dev_sectors / 2)
6394 info.size = -1;
6395 info.nr_disks = nr;
6396 info.raid_disks = mddev->raid_disks;
6397 info.md_minor = mddev->md_minor;
6398 info.not_persistent= !mddev->persistent;
6399
6400 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6401 info.state = 0;
6402 if (mddev->in_sync)
6403 info.state = (1<<MD_SB_CLEAN);
6404 if (mddev->bitmap && mddev->bitmap_info.offset)
6405 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6406 if (mddev_is_clustered(mddev))
6407 info.state |= (1<<MD_SB_CLUSTERED);
6408 info.active_disks = insync;
6409 info.working_disks = working;
6410 info.failed_disks = failed;
6411 info.spare_disks = spare;
6412
6413 info.layout = mddev->layout;
6414 info.chunk_size = mddev->chunk_sectors << 9;
6415
6416 if (copy_to_user(arg, &info, sizeof(info)))
6417 return -EFAULT;
6418
6419 return 0;
6420 }
6421
6422 static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6423 {
6424 mdu_bitmap_file_t *file = NULL;
6425 char *ptr;
6426 int err;
6427
6428 file = kzalloc(sizeof(*file), GFP_NOIO);
6429 if (!file)
6430 return -ENOMEM;
6431
6432 err = 0;
6433 spin_lock(&mddev->lock);
6434
6435 if (mddev->bitmap_info.file) {
6436 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6437 sizeof(file->pathname));
6438 if (IS_ERR(ptr))
6439 err = PTR_ERR(ptr);
6440 else
6441 memmove(file->pathname, ptr,
6442 sizeof(file->pathname)-(ptr-file->pathname));
6443 }
6444 spin_unlock(&mddev->lock);
6445
6446 if (err == 0 &&
6447 copy_to_user(arg, file, sizeof(*file)))
6448 err = -EFAULT;
6449
6450 kfree(file);
6451 return err;
6452 }
6453
6454 static int get_disk_info(struct mddev *mddev, void __user * arg)
6455 {
6456 mdu_disk_info_t info;
6457 struct md_rdev *rdev;
6458
6459 if (copy_from_user(&info, arg, sizeof(info)))
6460 return -EFAULT;
6461
6462 rcu_read_lock();
6463 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6464 if (rdev) {
6465 info.major = MAJOR(rdev->bdev->bd_dev);
6466 info.minor = MINOR(rdev->bdev->bd_dev);
6467 info.raid_disk = rdev->raid_disk;
6468 info.state = 0;
6469 if (test_bit(Faulty, &rdev->flags))
6470 info.state |= (1<<MD_DISK_FAULTY);
6471 else if (test_bit(In_sync, &rdev->flags)) {
6472 info.state |= (1<<MD_DISK_ACTIVE);
6473 info.state |= (1<<MD_DISK_SYNC);
6474 }
6475 if (test_bit(Journal, &rdev->flags))
6476 info.state |= (1<<MD_DISK_JOURNAL);
6477 if (test_bit(WriteMostly, &rdev->flags))
6478 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6479 if (test_bit(FailFast, &rdev->flags))
6480 info.state |= (1<<MD_DISK_FAILFAST);
6481 } else {
6482 info.major = info.minor = 0;
6483 info.raid_disk = -1;
6484 info.state = (1<<MD_DISK_REMOVED);
6485 }
6486 rcu_read_unlock();
6487
6488 if (copy_to_user(arg, &info, sizeof(info)))
6489 return -EFAULT;
6490
6491 return 0;
6492 }
6493
6494 static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6495 {
6496 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6497 struct md_rdev *rdev;
6498 dev_t dev = MKDEV(info->major,info->minor);
6499
6500 if (mddev_is_clustered(mddev) &&
6501 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6502 pr_warn("%s: Cannot add to clustered mddev.\n",
6503 mdname(mddev));
6504 return -EINVAL;
6505 }
6506
6507 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6508 return -EOVERFLOW;
6509
6510 if (!mddev->raid_disks) {
6511 int err;
6512
6513 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6514 if (IS_ERR(rdev)) {
6515 pr_warn("md: md_import_device returned %ld\n",
6516 PTR_ERR(rdev));
6517 return PTR_ERR(rdev);
6518 }
6519 if (!list_empty(&mddev->disks)) {
6520 struct md_rdev *rdev0
6521 = list_entry(mddev->disks.next,
6522 struct md_rdev, same_set);
6523 err = super_types[mddev->major_version]
6524 .load_super(rdev, rdev0, mddev->minor_version);
6525 if (err < 0) {
6526 pr_warn("md: %s has different UUID to %s\n",
6527 bdevname(rdev->bdev,b),
6528 bdevname(rdev0->bdev,b2));
6529 export_rdev(rdev);
6530 return -EINVAL;
6531 }
6532 }
6533 err = bind_rdev_to_array(rdev, mddev);
6534 if (err)
6535 export_rdev(rdev);
6536 return err;
6537 }
6538
6539
6540
6541
6542
6543
6544 if (mddev->pers) {
6545 int err;
6546 if (!mddev->pers->hot_add_disk) {
6547 pr_warn("%s: personality does not support diskops!\n",
6548 mdname(mddev));
6549 return -EINVAL;
6550 }
6551 if (mddev->persistent)
6552 rdev = md_import_device(dev, mddev->major_version,
6553 mddev->minor_version);
6554 else
6555 rdev = md_import_device(dev, -1, -1);
6556 if (IS_ERR(rdev)) {
6557 pr_warn("md: md_import_device returned %ld\n",
6558 PTR_ERR(rdev));
6559 return PTR_ERR(rdev);
6560 }
6561
6562 if (!mddev->persistent) {
6563 if (info->state & (1<<MD_DISK_SYNC) &&
6564 info->raid_disk < mddev->raid_disks) {
6565 rdev->raid_disk = info->raid_disk;
6566 set_bit(In_sync, &rdev->flags);
6567 clear_bit(Bitmap_sync, &rdev->flags);
6568 } else
6569 rdev->raid_disk = -1;
6570 rdev->saved_raid_disk = rdev->raid_disk;
6571 } else
6572 super_types[mddev->major_version].
6573 validate_super(mddev, rdev);
6574 if ((info->state & (1<<MD_DISK_SYNC)) &&
6575 rdev->raid_disk != info->raid_disk) {
6576
6577
6578
6579 export_rdev(rdev);
6580 return -EINVAL;
6581 }
6582
6583 clear_bit(In_sync, &rdev->flags);
6584 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6585 set_bit(WriteMostly, &rdev->flags);
6586 else
6587 clear_bit(WriteMostly, &rdev->flags);
6588 if (info->state & (1<<MD_DISK_FAILFAST))
6589 set_bit(FailFast, &rdev->flags);
6590 else
6591 clear_bit(FailFast, &rdev->flags);
6592
6593 if (info->state & (1<<MD_DISK_JOURNAL)) {
6594 struct md_rdev *rdev2;
6595 bool has_journal = false;
6596
6597
6598 rdev_for_each(rdev2, mddev) {
6599 if (test_bit(Journal, &rdev2->flags)) {
6600 has_journal = true;
6601 break;
6602 }
6603 }
6604 if (has_journal || mddev->bitmap) {
6605 export_rdev(rdev);
6606 return -EBUSY;
6607 }
6608 set_bit(Journal, &rdev->flags);
6609 }
6610
6611
6612
6613 if (mddev_is_clustered(mddev)) {
6614 if (info->state & (1 << MD_DISK_CANDIDATE))
6615 set_bit(Candidate, &rdev->flags);
6616 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6617
6618 err = md_cluster_ops->add_new_disk(mddev, rdev);
6619 if (err) {
6620 export_rdev(rdev);
6621 return err;
6622 }
6623 }
6624 }
6625
6626 rdev->raid_disk = -1;
6627 err = bind_rdev_to_array(rdev, mddev);
6628
6629 if (err)
6630 export_rdev(rdev);
6631
6632 if (mddev_is_clustered(mddev)) {
6633 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6634 if (!err) {
6635 err = md_cluster_ops->new_disk_ack(mddev,
6636 err == 0);
6637 if (err)
6638 md_kick_rdev_from_array(rdev);
6639 }
6640 } else {
6641 if (err)
6642 md_cluster_ops->add_new_disk_cancel(mddev);
6643 else
6644 err = add_bound_rdev(rdev);
6645 }
6646
6647 } else if (!err)
6648 err = add_bound_rdev(rdev);
6649
6650 return err;
6651 }
6652
6653
6654
6655
6656 if (mddev->major_version != 0) {
6657 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6658 return -EINVAL;
6659 }
6660
6661 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6662 int err;
6663 rdev = md_import_device(dev, -1, 0);
6664 if (IS_ERR(rdev)) {
6665 pr_warn("md: error, md_import_device() returned %ld\n",
6666 PTR_ERR(rdev));
6667 return PTR_ERR(rdev);
6668 }
6669 rdev->desc_nr = info->number;
6670 if (info->raid_disk < mddev->raid_disks)
6671 rdev->raid_disk = info->raid_disk;
6672 else
6673 rdev->raid_disk = -1;
6674
6675 if (rdev->raid_disk < mddev->raid_disks)
6676 if (info->state & (1<<MD_DISK_SYNC))
6677 set_bit(In_sync, &rdev->flags);
6678
6679 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6680 set_bit(WriteMostly, &rdev->flags);
6681 if (info->state & (1<<MD_DISK_FAILFAST))
6682 set_bit(FailFast, &rdev->flags);
6683
6684 if (!mddev->persistent) {
6685 pr_debug("md: nonpersistent superblock ...\n");
6686 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6687 } else
6688 rdev->sb_start = calc_dev_sboffset(rdev);
6689 rdev->sectors = rdev->sb_start;
6690
6691 err = bind_rdev_to_array(rdev, mddev);
6692 if (err) {
6693 export_rdev(rdev);
6694 return err;
6695 }
6696 }
6697
6698 return 0;
6699 }
6700
6701 static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6702 {
6703 char b[BDEVNAME_SIZE];
6704 struct md_rdev *rdev;
6705
6706 if (!mddev->pers)
6707 return -ENODEV;
6708
6709 rdev = find_rdev(mddev, dev);
6710 if (!rdev)
6711 return -ENXIO;
6712
6713 if (rdev->raid_disk < 0)
6714 goto kick_rdev;
6715
6716 clear_bit(Blocked, &rdev->flags);
6717 remove_and_add_spares(mddev, rdev);
6718
6719 if (rdev->raid_disk >= 0)
6720 goto busy;
6721
6722 kick_rdev:
6723 if (mddev_is_clustered(mddev))
6724 md_cluster_ops->remove_disk(mddev, rdev);
6725
6726 md_kick_rdev_from_array(rdev);
6727 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6728 if (mddev->thread)
6729 md_wakeup_thread(mddev->thread);
6730 else
6731 md_update_sb(mddev, 1);
6732 md_new_event(mddev);
6733
6734 return 0;
6735 busy:
6736 pr_debug("md: cannot remove active disk %s from %s ...\n",
6737 bdevname(rdev->bdev,b), mdname(mddev));
6738 return -EBUSY;
6739 }
6740
6741 static int hot_add_disk(struct mddev *mddev, dev_t dev)
6742 {
6743 char b[BDEVNAME_SIZE];
6744 int err;
6745 struct md_rdev *rdev;
6746
6747 if (!mddev->pers)
6748 return -ENODEV;
6749
6750 if (mddev->major_version != 0) {
6751 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6752 mdname(mddev));
6753 return -EINVAL;
6754 }
6755 if (!mddev->pers->hot_add_disk) {
6756 pr_warn("%s: personality does not support diskops!\n",
6757 mdname(mddev));
6758 return -EINVAL;
6759 }
6760
6761 rdev = md_import_device(dev, -1, 0);
6762 if (IS_ERR(rdev)) {
6763 pr_warn("md: error, md_import_device() returned %ld\n",
6764 PTR_ERR(rdev));
6765 return -EINVAL;
6766 }
6767
6768 if (mddev->persistent)
6769 rdev->sb_start = calc_dev_sboffset(rdev);
6770 else
6771 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6772
6773 rdev->sectors = rdev->sb_start;
6774
6775 if (test_bit(Faulty, &rdev->flags)) {
6776 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6777 bdevname(rdev->bdev,b), mdname(mddev));
6778 err = -EINVAL;
6779 goto abort_export;
6780 }
6781
6782 clear_bit(In_sync, &rdev->flags);
6783 rdev->desc_nr = -1;
6784 rdev->saved_raid_disk = -1;
6785 err = bind_rdev_to_array(rdev, mddev);
6786 if (err)
6787 goto abort_export;
6788
6789
6790
6791
6792
6793
6794 rdev->raid_disk = -1;
6795
6796 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6797 if (!mddev->thread)
6798 md_update_sb(mddev, 1);
6799
6800
6801
6802
6803 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6804 md_wakeup_thread(mddev->thread);
6805 md_new_event(mddev);
6806 return 0;
6807
6808 abort_export:
6809 export_rdev(rdev);
6810 return err;
6811 }
6812
6813 static int set_bitmap_file(struct mddev *mddev, int fd)
6814 {
6815 int err = 0;
6816
6817 if (mddev->pers) {
6818 if (!mddev->pers->quiesce || !mddev->thread)
6819 return -EBUSY;
6820 if (mddev->recovery || mddev->sync_thread)
6821 return -EBUSY;
6822
6823 }
6824
6825 if (fd >= 0) {
6826 struct inode *inode;
6827 struct file *f;
6828
6829 if (mddev->bitmap || mddev->bitmap_info.file)
6830 return -EEXIST;
6831 f = fget(fd);
6832
6833 if (f == NULL) {
6834 pr_warn("%s: error: failed to get bitmap file\n",
6835 mdname(mddev));
6836 return -EBADF;
6837 }
6838
6839 inode = f->f_mapping->host;
6840 if (!S_ISREG(inode->i_mode)) {
6841 pr_warn("%s: error: bitmap file must be a regular file\n",
6842 mdname(mddev));
6843 err = -EBADF;
6844 } else if (!(f->f_mode & FMODE_WRITE)) {
6845 pr_warn("%s: error: bitmap file must open for write\n",
6846 mdname(mddev));
6847 err = -EBADF;
6848 } else if (atomic_read(&inode->i_writecount) != 1) {
6849 pr_warn("%s: error: bitmap file is already in use\n",
6850 mdname(mddev));
6851 err = -EBUSY;
6852 }
6853 if (err) {
6854 fput(f);
6855 return err;
6856 }
6857 mddev->bitmap_info.file = f;
6858 mddev->bitmap_info.offset = 0;
6859 } else if (mddev->bitmap == NULL)
6860 return -ENOENT;
6861 err = 0;
6862 if (mddev->pers) {
6863 if (fd >= 0) {
6864 struct bitmap *bitmap;
6865
6866 bitmap = md_bitmap_create(mddev, -1);
6867 mddev_suspend(mddev);
6868 if (!IS_ERR(bitmap)) {
6869 mddev->bitmap = bitmap;
6870 err = md_bitmap_load(mddev);
6871 } else
6872 err = PTR_ERR(bitmap);
6873 if (err) {
6874 md_bitmap_destroy(mddev);
6875 fd = -1;
6876 }
6877 mddev_resume(mddev);
6878 } else if (fd < 0) {
6879 mddev_suspend(mddev);
6880 md_bitmap_destroy(mddev);
6881 mddev_resume(mddev);
6882 }
6883 }
6884 if (fd < 0) {
6885 struct file *f = mddev->bitmap_info.file;
6886 if (f) {
6887 spin_lock(&mddev->lock);
6888 mddev->bitmap_info.file = NULL;
6889 spin_unlock(&mddev->lock);
6890 fput(f);
6891 }
6892 }
6893
6894 return err;
6895 }
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910 static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6911 {
6912
6913 if (info->raid_disks == 0) {
6914
6915 if (info->major_version < 0 ||
6916 info->major_version >= ARRAY_SIZE(super_types) ||
6917 super_types[info->major_version].name == NULL) {
6918
6919 pr_warn("md: superblock version %d not known\n",
6920 info->major_version);
6921 return -EINVAL;
6922 }
6923 mddev->major_version = info->major_version;
6924 mddev->minor_version = info->minor_version;
6925 mddev->patch_version = info->patch_version;
6926 mddev->persistent = !info->not_persistent;
6927
6928
6929
6930 mddev->ctime = ktime_get_real_seconds();
6931 return 0;
6932 }
6933 mddev->major_version = MD_MAJOR_VERSION;
6934 mddev->minor_version = MD_MINOR_VERSION;
6935 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6936 mddev->ctime = ktime_get_real_seconds();
6937
6938 mddev->level = info->level;
6939 mddev->clevel[0] = 0;
6940 mddev->dev_sectors = 2 * (sector_t)info->size;
6941 mddev->raid_disks = info->raid_disks;
6942
6943
6944
6945 if (info->state & (1<<MD_SB_CLEAN))
6946 mddev->recovery_cp = MaxSector;
6947 else
6948 mddev->recovery_cp = 0;
6949 mddev->persistent = ! info->not_persistent;
6950 mddev->external = 0;
6951
6952 mddev->layout = info->layout;
6953 if (mddev->level == 0)
6954
6955 mddev->layout = -1;
6956 mddev->chunk_sectors = info->chunk_size >> 9;
6957
6958 if (mddev->persistent) {
6959 mddev->max_disks = MD_SB_DISKS;
6960 mddev->flags = 0;
6961 mddev->sb_flags = 0;
6962 }
6963 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6964
6965 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6966 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6967 mddev->bitmap_info.offset = 0;
6968
6969 mddev->reshape_position = MaxSector;
6970
6971
6972
6973
6974 get_random_bytes(mddev->uuid, 16);
6975
6976 mddev->new_level = mddev->level;
6977 mddev->new_chunk_sectors = mddev->chunk_sectors;
6978 mddev->new_layout = mddev->layout;
6979 mddev->delta_disks = 0;
6980 mddev->reshape_backwards = 0;
6981
6982 return 0;
6983 }
6984
6985 void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6986 {
6987 lockdep_assert_held(&mddev->reconfig_mutex);
6988
6989 if (mddev->external_size)
6990 return;
6991
6992 mddev->array_sectors = array_sectors;
6993 }
6994 EXPORT_SYMBOL(md_set_array_sectors);
6995
6996 static int update_size(struct mddev *mddev, sector_t num_sectors)
6997 {
6998 struct md_rdev *rdev;
6999 int rv;
7000 int fit = (num_sectors == 0);
7001 sector_t old_dev_sectors = mddev->dev_sectors;
7002
7003 if (mddev->pers->resize == NULL)
7004 return -EINVAL;
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7015 mddev->sync_thread)
7016 return -EBUSY;
7017 if (mddev->ro)
7018 return -EROFS;
7019
7020 rdev_for_each(rdev, mddev) {
7021 sector_t avail = rdev->sectors;
7022
7023 if (fit && (num_sectors == 0 || num_sectors > avail))
7024 num_sectors = avail;
7025 if (avail < num_sectors)
7026 return -ENOSPC;
7027 }
7028 rv = mddev->pers->resize(mddev, num_sectors);
7029 if (!rv) {
7030 if (mddev_is_clustered(mddev))
7031 md_cluster_ops->update_size(mddev, old_dev_sectors);
7032 else if (mddev->queue) {
7033 set_capacity(mddev->gendisk, mddev->array_sectors);
7034 revalidate_disk(mddev->gendisk);
7035 }
7036 }
7037 return rv;
7038 }
7039
7040 static int update_raid_disks(struct mddev *mddev, int raid_disks)
7041 {
7042 int rv;
7043 struct md_rdev *rdev;
7044
7045 if (mddev->pers->check_reshape == NULL)
7046 return -EINVAL;
7047 if (mddev->ro)
7048 return -EROFS;
7049 if (raid_disks <= 0 ||
7050 (mddev->max_disks && raid_disks >= mddev->max_disks))
7051 return -EINVAL;
7052 if (mddev->sync_thread ||
7053 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7054 mddev->reshape_position != MaxSector)
7055 return -EBUSY;
7056
7057 rdev_for_each(rdev, mddev) {
7058 if (mddev->raid_disks < raid_disks &&
7059 rdev->data_offset < rdev->new_data_offset)
7060 return -EINVAL;
7061 if (mddev->raid_disks > raid_disks &&
7062 rdev->data_offset > rdev->new_data_offset)
7063 return -EINVAL;
7064 }
7065
7066 mddev->delta_disks = raid_disks - mddev->raid_disks;
7067 if (mddev->delta_disks < 0)
7068 mddev->reshape_backwards = 1;
7069 else if (mddev->delta_disks > 0)
7070 mddev->reshape_backwards = 0;
7071
7072 rv = mddev->pers->check_reshape(mddev);
7073 if (rv < 0) {
7074 mddev->delta_disks = 0;
7075 mddev->reshape_backwards = 0;
7076 }
7077 return rv;
7078 }
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088 static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7089 {
7090 int rv = 0;
7091 int cnt = 0;
7092 int state = 0;
7093
7094
7095 if (mddev->bitmap && mddev->bitmap_info.offset)
7096 state |= (1 << MD_SB_BITMAP_PRESENT);
7097
7098 if (mddev->major_version != info->major_version ||
7099 mddev->minor_version != info->minor_version ||
7100
7101 mddev->ctime != info->ctime ||
7102 mddev->level != info->level ||
7103
7104 mddev->persistent != !info->not_persistent ||
7105 mddev->chunk_sectors != info->chunk_size >> 9 ||
7106
7107 ((state^info->state) & 0xfffffe00)
7108 )
7109 return -EINVAL;
7110
7111 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7112 cnt++;
7113 if (mddev->raid_disks != info->raid_disks)
7114 cnt++;
7115 if (mddev->layout != info->layout)
7116 cnt++;
7117 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7118 cnt++;
7119 if (cnt == 0)
7120 return 0;
7121 if (cnt > 1)
7122 return -EINVAL;
7123
7124 if (mddev->layout != info->layout) {
7125
7126
7127
7128
7129 if (mddev->pers->check_reshape == NULL)
7130 return -EINVAL;
7131 else {
7132 mddev->new_layout = info->layout;
7133 rv = mddev->pers->check_reshape(mddev);
7134 if (rv)
7135 mddev->new_layout = mddev->layout;
7136 return rv;
7137 }
7138 }
7139 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7140 rv = update_size(mddev, (sector_t)info->size * 2);
7141
7142 if (mddev->raid_disks != info->raid_disks)
7143 rv = update_raid_disks(mddev, info->raid_disks);
7144
7145 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7146 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7147 rv = -EINVAL;
7148 goto err;
7149 }
7150 if (mddev->recovery || mddev->sync_thread) {
7151 rv = -EBUSY;
7152 goto err;
7153 }
7154 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7155 struct bitmap *bitmap;
7156
7157 if (mddev->bitmap) {
7158 rv = -EEXIST;
7159 goto err;
7160 }
7161 if (mddev->bitmap_info.default_offset == 0) {
7162 rv = -EINVAL;
7163 goto err;
7164 }
7165 mddev->bitmap_info.offset =
7166 mddev->bitmap_info.default_offset;
7167 mddev->bitmap_info.space =
7168 mddev->bitmap_info.default_space;
7169 bitmap = md_bitmap_create(mddev, -1);
7170 mddev_suspend(mddev);
7171 if (!IS_ERR(bitmap)) {
7172 mddev->bitmap = bitmap;
7173 rv = md_bitmap_load(mddev);
7174 } else
7175 rv = PTR_ERR(bitmap);
7176 if (rv)
7177 md_bitmap_destroy(mddev);
7178 mddev_resume(mddev);
7179 } else {
7180
7181 if (!mddev->bitmap) {
7182 rv = -ENOENT;
7183 goto err;
7184 }
7185 if (mddev->bitmap->storage.file) {
7186 rv = -EINVAL;
7187 goto err;
7188 }
7189 if (mddev->bitmap_info.nodes) {
7190
7191 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7192 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7193 rv = -EPERM;
7194 md_cluster_ops->unlock_all_bitmaps(mddev);
7195 goto err;
7196 }
7197
7198 mddev->bitmap_info.nodes = 0;
7199 md_cluster_ops->leave(mddev);
7200 }
7201 mddev_suspend(mddev);
7202 md_bitmap_destroy(mddev);
7203 mddev_resume(mddev);
7204 mddev->bitmap_info.offset = 0;
7205 }
7206 }
7207 md_update_sb(mddev, 1);
7208 return rv;
7209 err:
7210 return rv;
7211 }
7212
7213 static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7214 {
7215 struct md_rdev *rdev;
7216 int err = 0;
7217
7218 if (mddev->pers == NULL)
7219 return -ENODEV;
7220
7221 rcu_read_lock();
7222 rdev = md_find_rdev_rcu(mddev, dev);
7223 if (!rdev)
7224 err = -ENODEV;
7225 else {
7226 md_error(mddev, rdev);
7227 if (!test_bit(Faulty, &rdev->flags))
7228 err = -EBUSY;
7229 }
7230 rcu_read_unlock();
7231 return err;
7232 }
7233
7234
7235
7236
7237
7238
7239
7240 static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7241 {
7242 struct mddev *mddev = bdev->bd_disk->private_data;
7243
7244 geo->heads = 2;
7245 geo->sectors = 4;
7246 geo->cylinders = mddev->array_sectors / 8;
7247 return 0;
7248 }
7249
7250 static inline bool md_ioctl_valid(unsigned int cmd)
7251 {
7252 switch (cmd) {
7253 case ADD_NEW_DISK:
7254 case BLKROSET:
7255 case GET_ARRAY_INFO:
7256 case GET_BITMAP_FILE:
7257 case GET_DISK_INFO:
7258 case HOT_ADD_DISK:
7259 case HOT_REMOVE_DISK:
7260 case RAID_AUTORUN:
7261 case RAID_VERSION:
7262 case RESTART_ARRAY_RW:
7263 case RUN_ARRAY:
7264 case SET_ARRAY_INFO:
7265 case SET_BITMAP_FILE:
7266 case SET_DISK_FAULTY:
7267 case STOP_ARRAY:
7268 case STOP_ARRAY_RO:
7269 case CLUSTERED_DISK_NACK:
7270 return true;
7271 default:
7272 return false;
7273 }
7274 }
7275
7276 static int md_ioctl(struct block_device *bdev, fmode_t mode,
7277 unsigned int cmd, unsigned long arg)
7278 {
7279 int err = 0;
7280 void __user *argp = (void __user *)arg;
7281 struct mddev *mddev = NULL;
7282 int ro;
7283 bool did_set_md_closing = false;
7284
7285 if (!md_ioctl_valid(cmd))
7286 return -ENOTTY;
7287
7288 switch (cmd) {
7289 case RAID_VERSION:
7290 case GET_ARRAY_INFO:
7291 case GET_DISK_INFO:
7292 break;
7293 default:
7294 if (!capable(CAP_SYS_ADMIN))
7295 return -EACCES;
7296 }
7297
7298
7299
7300
7301
7302 switch (cmd) {
7303 case RAID_VERSION:
7304 err = get_version(argp);
7305 goto out;
7306
7307 #ifndef MODULE
7308 case RAID_AUTORUN:
7309 err = 0;
7310 autostart_arrays(arg);
7311 goto out;
7312 #endif
7313 default:;
7314 }
7315
7316
7317
7318
7319
7320 mddev = bdev->bd_disk->private_data;
7321
7322 if (!mddev) {
7323 BUG();
7324 goto out;
7325 }
7326
7327
7328 switch (cmd) {
7329 case GET_ARRAY_INFO:
7330 if (!mddev->raid_disks && !mddev->external)
7331 err = -ENODEV;
7332 else
7333 err = get_array_info(mddev, argp);
7334 goto out;
7335
7336 case GET_DISK_INFO:
7337 if (!mddev->raid_disks && !mddev->external)
7338 err = -ENODEV;
7339 else
7340 err = get_disk_info(mddev, argp);
7341 goto out;
7342
7343 case SET_DISK_FAULTY:
7344 err = set_disk_faulty(mddev, new_decode_dev(arg));
7345 goto out;
7346
7347 case GET_BITMAP_FILE:
7348 err = get_bitmap_file(mddev, argp);
7349 goto out;
7350
7351 }
7352
7353 if (cmd == ADD_NEW_DISK)
7354
7355 flush_workqueue(md_misc_wq);
7356
7357 if (cmd == HOT_REMOVE_DISK)
7358
7359 wait_event_interruptible_timeout(mddev->sb_wait,
7360 !test_bit(MD_RECOVERY_NEEDED,
7361 &mddev->recovery),
7362 msecs_to_jiffies(5000));
7363 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7364
7365
7366
7367 mutex_lock(&mddev->open_mutex);
7368 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7369 mutex_unlock(&mddev->open_mutex);
7370 err = -EBUSY;
7371 goto out;
7372 }
7373 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7374 set_bit(MD_CLOSING, &mddev->flags);
7375 did_set_md_closing = true;
7376 mutex_unlock(&mddev->open_mutex);
7377 sync_blockdev(bdev);
7378 }
7379 err = mddev_lock(mddev);
7380 if (err) {
7381 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7382 err, cmd);
7383 goto out;
7384 }
7385
7386 if (cmd == SET_ARRAY_INFO) {
7387 mdu_array_info_t info;
7388 if (!arg)
7389 memset(&info, 0, sizeof(info));
7390 else if (copy_from_user(&info, argp, sizeof(info))) {
7391 err = -EFAULT;
7392 goto unlock;
7393 }
7394 if (mddev->pers) {
7395 err = update_array_info(mddev, &info);
7396 if (err) {
7397 pr_warn("md: couldn't update array info. %d\n", err);
7398 goto unlock;
7399 }
7400 goto unlock;
7401 }
7402 if (!list_empty(&mddev->disks)) {
7403 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7404 err = -EBUSY;
7405 goto unlock;
7406 }
7407 if (mddev->raid_disks) {
7408 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7409 err = -EBUSY;
7410 goto unlock;
7411 }
7412 err = set_array_info(mddev, &info);
7413 if (err) {
7414 pr_warn("md: couldn't set array info. %d\n", err);
7415 goto unlock;
7416 }
7417 goto unlock;
7418 }
7419
7420
7421
7422
7423
7424
7425 if ((!mddev->raid_disks && !mddev->external)
7426 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7427 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7428 && cmd != GET_BITMAP_FILE) {
7429 err = -ENODEV;
7430 goto unlock;
7431 }
7432
7433
7434
7435
7436 switch (cmd) {
7437 case RESTART_ARRAY_RW:
7438 err = restart_array(mddev);
7439 goto unlock;
7440
7441 case STOP_ARRAY:
7442 err = do_md_stop(mddev, 0, bdev);
7443 goto unlock;
7444
7445 case STOP_ARRAY_RO:
7446 err = md_set_readonly(mddev, bdev);
7447 goto unlock;
7448
7449 case HOT_REMOVE_DISK:
7450 err = hot_remove_disk(mddev, new_decode_dev(arg));
7451 goto unlock;
7452
7453 case ADD_NEW_DISK:
7454
7455
7456
7457
7458 if (mddev->pers) {
7459 mdu_disk_info_t info;
7460 if (copy_from_user(&info, argp, sizeof(info)))
7461 err = -EFAULT;
7462 else if (!(info.state & (1<<MD_DISK_SYNC)))
7463
7464 break;
7465 else
7466 err = add_new_disk(mddev, &info);
7467 goto unlock;
7468 }
7469 break;
7470
7471 case BLKROSET:
7472 if (get_user(ro, (int __user *)(arg))) {
7473 err = -EFAULT;
7474 goto unlock;
7475 }
7476 err = -EINVAL;
7477
7478
7479
7480
7481 if (ro)
7482 goto unlock;
7483
7484
7485 if (mddev->ro != 1)
7486 goto unlock;
7487
7488
7489
7490
7491 if (mddev->pers) {
7492 err = restart_array(mddev);
7493 if (err == 0) {
7494 mddev->ro = 2;
7495 set_disk_ro(mddev->gendisk, 0);
7496 }
7497 }
7498 goto unlock;
7499 }
7500
7501
7502
7503
7504
7505 if (mddev->ro && mddev->pers) {
7506 if (mddev->ro == 2) {
7507 mddev->ro = 0;
7508 sysfs_notify_dirent_safe(mddev->sysfs_state);
7509 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7510
7511
7512
7513
7514 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7515 mddev_unlock(mddev);
7516 wait_event(mddev->sb_wait,
7517 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7518 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7519 mddev_lock_nointr(mddev);
7520 }
7521 } else {
7522 err = -EROFS;
7523 goto unlock;
7524 }
7525 }
7526
7527 switch (cmd) {
7528 case ADD_NEW_DISK:
7529 {
7530 mdu_disk_info_t info;
7531 if (copy_from_user(&info, argp, sizeof(info)))
7532 err = -EFAULT;
7533 else
7534 err = add_new_disk(mddev, &info);
7535 goto unlock;
7536 }
7537
7538 case CLUSTERED_DISK_NACK:
7539 if (mddev_is_clustered(mddev))
7540 md_cluster_ops->new_disk_ack(mddev, false);
7541 else
7542 err = -EINVAL;
7543 goto unlock;
7544
7545 case HOT_ADD_DISK:
7546 err = hot_add_disk(mddev, new_decode_dev(arg));
7547 goto unlock;
7548
7549 case RUN_ARRAY:
7550 err = do_md_run(mddev);
7551 goto unlock;
7552
7553 case SET_BITMAP_FILE:
7554 err = set_bitmap_file(mddev, (int)arg);
7555 goto unlock;
7556
7557 default:
7558 err = -EINVAL;
7559 goto unlock;
7560 }
7561
7562 unlock:
7563 if (mddev->hold_active == UNTIL_IOCTL &&
7564 err != -EINVAL)
7565 mddev->hold_active = 0;
7566 mddev_unlock(mddev);
7567 out:
7568 if(did_set_md_closing)
7569 clear_bit(MD_CLOSING, &mddev->flags);
7570 return err;
7571 }
7572 #ifdef CONFIG_COMPAT
7573 static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7574 unsigned int cmd, unsigned long arg)
7575 {
7576 switch (cmd) {
7577 case HOT_REMOVE_DISK:
7578 case HOT_ADD_DISK:
7579 case SET_DISK_FAULTY:
7580 case SET_BITMAP_FILE:
7581
7582 break;
7583 default:
7584 arg = (unsigned long)compat_ptr(arg);
7585 break;
7586 }
7587
7588 return md_ioctl(bdev, mode, cmd, arg);
7589 }
7590 #endif
7591
7592 static int md_open(struct block_device *bdev, fmode_t mode)
7593 {
7594
7595
7596
7597
7598 struct mddev *mddev = mddev_find(bdev->bd_dev);
7599 int err;
7600
7601 if (!mddev)
7602 return -ENODEV;
7603
7604 if (mddev->gendisk != bdev->bd_disk) {
7605
7606
7607
7608 mddev_put(mddev);
7609
7610 flush_workqueue(md_misc_wq);
7611
7612 return -ERESTARTSYS;
7613 }
7614 BUG_ON(mddev != bdev->bd_disk->private_data);
7615
7616 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7617 goto out;
7618
7619 if (test_bit(MD_CLOSING, &mddev->flags)) {
7620 mutex_unlock(&mddev->open_mutex);
7621 err = -ENODEV;
7622 goto out;
7623 }
7624
7625 err = 0;
7626 atomic_inc(&mddev->openers);
7627 mutex_unlock(&mddev->open_mutex);
7628
7629 check_disk_change(bdev);
7630 out:
7631 if (err)
7632 mddev_put(mddev);
7633 return err;
7634 }
7635
7636 static void md_release(struct gendisk *disk, fmode_t mode)
7637 {
7638 struct mddev *mddev = disk->private_data;
7639
7640 BUG_ON(!mddev);
7641 atomic_dec(&mddev->openers);
7642 mddev_put(mddev);
7643 }
7644
7645 static int md_media_changed(struct gendisk *disk)
7646 {
7647 struct mddev *mddev = disk->private_data;
7648
7649 return mddev->changed;
7650 }
7651
7652 static int md_revalidate(struct gendisk *disk)
7653 {
7654 struct mddev *mddev = disk->private_data;
7655
7656 mddev->changed = 0;
7657 return 0;
7658 }
7659 static const struct block_device_operations md_fops =
7660 {
7661 .owner = THIS_MODULE,
7662 .open = md_open,
7663 .release = md_release,
7664 .ioctl = md_ioctl,
7665 #ifdef CONFIG_COMPAT
7666 .compat_ioctl = md_compat_ioctl,
7667 #endif
7668 .getgeo = md_getgeo,
7669 .media_changed = md_media_changed,
7670 .revalidate_disk= md_revalidate,
7671 };
7672
7673 static int md_thread(void *arg)
7674 {
7675 struct md_thread *thread = arg;
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689 allow_signal(SIGKILL);
7690 while (!kthread_should_stop()) {
7691
7692
7693
7694
7695
7696
7697 if (signal_pending(current))
7698 flush_signals(current);
7699
7700 wait_event_interruptible_timeout
7701 (thread->wqueue,
7702 test_bit(THREAD_WAKEUP, &thread->flags)
7703 || kthread_should_stop() || kthread_should_park(),
7704 thread->timeout);
7705
7706 clear_bit(THREAD_WAKEUP, &thread->flags);
7707 if (kthread_should_park())
7708 kthread_parkme();
7709 if (!kthread_should_stop())
7710 thread->run(thread);
7711 }
7712
7713 return 0;
7714 }
7715
7716 void md_wakeup_thread(struct md_thread *thread)
7717 {
7718 if (thread) {
7719 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7720 set_bit(THREAD_WAKEUP, &thread->flags);
7721 wake_up(&thread->wqueue);
7722 }
7723 }
7724 EXPORT_SYMBOL(md_wakeup_thread);
7725
7726 struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7727 struct mddev *mddev, const char *name)
7728 {
7729 struct md_thread *thread;
7730
7731 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7732 if (!thread)
7733 return NULL;
7734
7735 init_waitqueue_head(&thread->wqueue);
7736
7737 thread->run = run;
7738 thread->mddev = mddev;
7739 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7740 thread->tsk = kthread_run(md_thread, thread,
7741 "%s_%s",
7742 mdname(thread->mddev),
7743 name);
7744 if (IS_ERR(thread->tsk)) {
7745 kfree(thread);
7746 return NULL;
7747 }
7748 return thread;
7749 }
7750 EXPORT_SYMBOL(md_register_thread);
7751
7752 void md_unregister_thread(struct md_thread **threadp)
7753 {
7754 struct md_thread *thread = *threadp;
7755 if (!thread)
7756 return;
7757 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7758
7759
7760
7761 spin_lock(&pers_lock);
7762 *threadp = NULL;
7763 spin_unlock(&pers_lock);
7764
7765 kthread_stop(thread->tsk);
7766 kfree(thread);
7767 }
7768 EXPORT_SYMBOL(md_unregister_thread);
7769
7770 void md_error(struct mddev *mddev, struct md_rdev *rdev)
7771 {
7772 if (!rdev || test_bit(Faulty, &rdev->flags))
7773 return;
7774
7775 if (!mddev->pers || !mddev->pers->error_handler)
7776 return;
7777 mddev->pers->error_handler(mddev,rdev);
7778 if (mddev->degraded)
7779 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7780 sysfs_notify_dirent_safe(rdev->sysfs_state);
7781 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7782 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7783 md_wakeup_thread(mddev->thread);
7784 if (mddev->event_work.func)
7785 queue_work(md_misc_wq, &mddev->event_work);
7786 md_new_event(mddev);
7787 }
7788 EXPORT_SYMBOL(md_error);
7789
7790
7791
7792 static void status_unused(struct seq_file *seq)
7793 {
7794 int i = 0;
7795 struct md_rdev *rdev;
7796
7797 seq_printf(seq, "unused devices: ");
7798
7799 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7800 char b[BDEVNAME_SIZE];
7801 i++;
7802 seq_printf(seq, "%s ",
7803 bdevname(rdev->bdev,b));
7804 }
7805 if (!i)
7806 seq_printf(seq, "<none>");
7807
7808 seq_printf(seq, "\n");
7809 }
7810
7811 static int status_resync(struct seq_file *seq, struct mddev *mddev)
7812 {
7813 sector_t max_sectors, resync, res;
7814 unsigned long dt, db = 0;
7815 sector_t rt, curr_mark_cnt, resync_mark_cnt;
7816 int scale, recovery_active;
7817 unsigned int per_milli;
7818
7819 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7820 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7821 max_sectors = mddev->resync_max_sectors;
7822 else
7823 max_sectors = mddev->dev_sectors;
7824
7825 resync = mddev->curr_resync;
7826 if (resync <= 3) {
7827 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7828
7829 resync = max_sectors;
7830 } else if (resync > max_sectors)
7831 resync = max_sectors;
7832 else
7833 resync -= atomic_read(&mddev->recovery_active);
7834
7835 if (resync == 0) {
7836 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
7837 struct md_rdev *rdev;
7838
7839 rdev_for_each(rdev, mddev)
7840 if (rdev->raid_disk >= 0 &&
7841 !test_bit(Faulty, &rdev->flags) &&
7842 rdev->recovery_offset != MaxSector &&
7843 rdev->recovery_offset) {
7844 seq_printf(seq, "\trecover=REMOTE");
7845 return 1;
7846 }
7847 if (mddev->reshape_position != MaxSector)
7848 seq_printf(seq, "\treshape=REMOTE");
7849 else
7850 seq_printf(seq, "\tresync=REMOTE");
7851 return 1;
7852 }
7853 if (mddev->recovery_cp < MaxSector) {
7854 seq_printf(seq, "\tresync=PENDING");
7855 return 1;
7856 }
7857 return 0;
7858 }
7859 if (resync < 3) {
7860 seq_printf(seq, "\tresync=DELAYED");
7861 return 1;
7862 }
7863
7864 WARN_ON(max_sectors == 0);
7865
7866
7867
7868
7869
7870 scale = 10;
7871 if (sizeof(sector_t) > sizeof(unsigned long)) {
7872 while ( max_sectors/2 > (1ULL<<(scale+32)))
7873 scale++;
7874 }
7875 res = (resync>>scale)*1000;
7876 sector_div(res, (u32)((max_sectors>>scale)+1));
7877
7878 per_milli = res;
7879 {
7880 int i, x = per_milli/50, y = 20-x;
7881 seq_printf(seq, "[");
7882 for (i = 0; i < x; i++)
7883 seq_printf(seq, "=");
7884 seq_printf(seq, ">");
7885 for (i = 0; i < y; i++)
7886 seq_printf(seq, ".");
7887 seq_printf(seq, "] ");
7888 }
7889 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7890 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7891 "reshape" :
7892 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7893 "check" :
7894 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7895 "resync" : "recovery"))),
7896 per_milli/10, per_milli % 10,
7897 (unsigned long long) resync/2,
7898 (unsigned long long) max_sectors/2);
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917 dt = ((jiffies - mddev->resync_mark) / HZ);
7918 if (!dt) dt++;
7919
7920 curr_mark_cnt = mddev->curr_mark_cnt;
7921 recovery_active = atomic_read(&mddev->recovery_active);
7922 resync_mark_cnt = mddev->resync_mark_cnt;
7923
7924 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
7925 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
7926
7927 rt = max_sectors - resync;
7928 rt = div64_u64(rt, db/32+1);
7929 rt *= dt;
7930 rt >>= 5;
7931
7932 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7933 ((unsigned long)rt % 60)/6);
7934
7935 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7936 return 1;
7937 }
7938
7939 static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7940 {
7941 struct list_head *tmp;
7942 loff_t l = *pos;
7943 struct mddev *mddev;
7944
7945 if (l >= 0x10000)
7946 return NULL;
7947 if (!l--)
7948
7949 return (void*)1;
7950
7951 spin_lock(&all_mddevs_lock);
7952 list_for_each(tmp,&all_mddevs)
7953 if (!l--) {
7954 mddev = list_entry(tmp, struct mddev, all_mddevs);
7955 mddev_get(mddev);
7956 spin_unlock(&all_mddevs_lock);
7957 return mddev;
7958 }
7959 spin_unlock(&all_mddevs_lock);
7960 if (!l--)
7961 return (void*)2;
7962 return NULL;
7963 }
7964
7965 static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7966 {
7967 struct list_head *tmp;
7968 struct mddev *next_mddev, *mddev = v;
7969
7970 ++*pos;
7971 if (v == (void*)2)
7972 return NULL;
7973
7974 spin_lock(&all_mddevs_lock);
7975 if (v == (void*)1)
7976 tmp = all_mddevs.next;
7977 else
7978 tmp = mddev->all_mddevs.next;
7979 if (tmp != &all_mddevs)
7980 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7981 else {
7982 next_mddev = (void*)2;
7983 *pos = 0x10000;
7984 }
7985 spin_unlock(&all_mddevs_lock);
7986
7987 if (v != (void*)1)
7988 mddev_put(mddev);
7989 return next_mddev;
7990
7991 }
7992
7993 static void md_seq_stop(struct seq_file *seq, void *v)
7994 {
7995 struct mddev *mddev = v;
7996
7997 if (mddev && v != (void*)1 && v != (void*)2)
7998 mddev_put(mddev);
7999 }
8000
8001 static int md_seq_show(struct seq_file *seq, void *v)
8002 {
8003 struct mddev *mddev = v;
8004 sector_t sectors;
8005 struct md_rdev *rdev;
8006
8007 if (v == (void*)1) {
8008 struct md_personality *pers;
8009 seq_printf(seq, "Personalities : ");
8010 spin_lock(&pers_lock);
8011 list_for_each_entry(pers, &pers_list, list)
8012 seq_printf(seq, "[%s] ", pers->name);
8013
8014 spin_unlock(&pers_lock);
8015 seq_printf(seq, "\n");
8016 seq->poll_event = atomic_read(&md_event_count);
8017 return 0;
8018 }
8019 if (v == (void*)2) {
8020 status_unused(seq);
8021 return 0;
8022 }
8023
8024 spin_lock(&mddev->lock);
8025 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8026 seq_printf(seq, "%s : %sactive", mdname(mddev),
8027 mddev->pers ? "" : "in");
8028 if (mddev->pers) {
8029 if (mddev->ro==1)
8030 seq_printf(seq, " (read-only)");
8031 if (mddev->ro==2)
8032 seq_printf(seq, " (auto-read-only)");
8033 seq_printf(seq, " %s", mddev->pers->name);
8034 }
8035
8036 sectors = 0;
8037 rcu_read_lock();
8038 rdev_for_each_rcu(rdev, mddev) {
8039 char b[BDEVNAME_SIZE];
8040 seq_printf(seq, " %s[%d]",
8041 bdevname(rdev->bdev,b), rdev->desc_nr);
8042 if (test_bit(WriteMostly, &rdev->flags))
8043 seq_printf(seq, "(W)");
8044 if (test_bit(Journal, &rdev->flags))
8045 seq_printf(seq, "(J)");
8046 if (test_bit(Faulty, &rdev->flags)) {
8047 seq_printf(seq, "(F)");
8048 continue;
8049 }
8050 if (rdev->raid_disk < 0)
8051 seq_printf(seq, "(S)");
8052 if (test_bit(Replacement, &rdev->flags))
8053 seq_printf(seq, "(R)");
8054 sectors += rdev->sectors;
8055 }
8056 rcu_read_unlock();
8057
8058 if (!list_empty(&mddev->disks)) {
8059 if (mddev->pers)
8060 seq_printf(seq, "\n %llu blocks",
8061 (unsigned long long)
8062 mddev->array_sectors / 2);
8063 else
8064 seq_printf(seq, "\n %llu blocks",
8065 (unsigned long long)sectors / 2);
8066 }
8067 if (mddev->persistent) {
8068 if (mddev->major_version != 0 ||
8069 mddev->minor_version != 90) {
8070 seq_printf(seq," super %d.%d",
8071 mddev->major_version,
8072 mddev->minor_version);
8073 }
8074 } else if (mddev->external)
8075 seq_printf(seq, " super external:%s",
8076 mddev->metadata_type);
8077 else
8078 seq_printf(seq, " super non-persistent");
8079
8080 if (mddev->pers) {
8081 mddev->pers->status(seq, mddev);
8082 seq_printf(seq, "\n ");
8083 if (mddev->pers->sync_request) {
8084 if (status_resync(seq, mddev))
8085 seq_printf(seq, "\n ");
8086 }
8087 } else
8088 seq_printf(seq, "\n ");
8089
8090 md_bitmap_status(seq, mddev->bitmap);
8091
8092 seq_printf(seq, "\n");
8093 }
8094 spin_unlock(&mddev->lock);
8095
8096 return 0;
8097 }
8098
8099 static const struct seq_operations md_seq_ops = {
8100 .start = md_seq_start,
8101 .next = md_seq_next,
8102 .stop = md_seq_stop,
8103 .show = md_seq_show,
8104 };
8105
8106 static int md_seq_open(struct inode *inode, struct file *file)
8107 {
8108 struct seq_file *seq;
8109 int error;
8110
8111 error = seq_open(file, &md_seq_ops);
8112 if (error)
8113 return error;
8114
8115 seq = file->private_data;
8116 seq->poll_event = atomic_read(&md_event_count);
8117 return error;
8118 }
8119
8120 static int md_unloading;
8121 static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8122 {
8123 struct seq_file *seq = filp->private_data;
8124 __poll_t mask;
8125
8126 if (md_unloading)
8127 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8128 poll_wait(filp, &md_event_waiters, wait);
8129
8130
8131 mask = EPOLLIN | EPOLLRDNORM;
8132
8133 if (seq->poll_event != atomic_read(&md_event_count))
8134 mask |= EPOLLERR | EPOLLPRI;
8135 return mask;
8136 }
8137
8138 static const struct file_operations md_seq_fops = {
8139 .owner = THIS_MODULE,
8140 .open = md_seq_open,
8141 .read = seq_read,
8142 .llseek = seq_lseek,
8143 .release = seq_release,
8144 .poll = mdstat_poll,
8145 };
8146
8147 int register_md_personality(struct md_personality *p)
8148 {
8149 pr_debug("md: %s personality registered for level %d\n",
8150 p->name, p->level);
8151 spin_lock(&pers_lock);
8152 list_add_tail(&p->list, &pers_list);
8153 spin_unlock(&pers_lock);
8154 return 0;
8155 }
8156 EXPORT_SYMBOL(register_md_personality);
8157
8158 int unregister_md_personality(struct md_personality *p)
8159 {
8160 pr_debug("md: %s personality unregistered\n", p->name);
8161 spin_lock(&pers_lock);
8162 list_del_init(&p->list);
8163 spin_unlock(&pers_lock);
8164 return 0;
8165 }
8166 EXPORT_SYMBOL(unregister_md_personality);
8167
8168 int register_md_cluster_operations(struct md_cluster_operations *ops,
8169 struct module *module)
8170 {
8171 int ret = 0;
8172 spin_lock(&pers_lock);
8173 if (md_cluster_ops != NULL)
8174 ret = -EALREADY;
8175 else {
8176 md_cluster_ops = ops;
8177 md_cluster_mod = module;
8178 }
8179 spin_unlock(&pers_lock);
8180 return ret;
8181 }
8182 EXPORT_SYMBOL(register_md_cluster_operations);
8183
8184 int unregister_md_cluster_operations(void)
8185 {
8186 spin_lock(&pers_lock);
8187 md_cluster_ops = NULL;
8188 spin_unlock(&pers_lock);
8189 return 0;
8190 }
8191 EXPORT_SYMBOL(unregister_md_cluster_operations);
8192
8193 int md_setup_cluster(struct mddev *mddev, int nodes)
8194 {
8195 if (!md_cluster_ops)
8196 request_module("md-cluster");
8197 spin_lock(&pers_lock);
8198
8199 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8200 pr_warn("can't find md-cluster module or get it's reference.\n");
8201 spin_unlock(&pers_lock);
8202 return -ENOENT;
8203 }
8204 spin_unlock(&pers_lock);
8205
8206 return md_cluster_ops->join(mddev, nodes);
8207 }
8208
8209 void md_cluster_stop(struct mddev *mddev)
8210 {
8211 if (!md_cluster_ops)
8212 return;
8213 md_cluster_ops->leave(mddev);
8214 module_put(md_cluster_mod);
8215 }
8216
8217 static int is_mddev_idle(struct mddev *mddev, int init)
8218 {
8219 struct md_rdev *rdev;
8220 int idle;
8221 int curr_events;
8222
8223 idle = 1;
8224 rcu_read_lock();
8225 rdev_for_each_rcu(rdev, mddev) {
8226 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8227 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8228 atomic_read(&disk->sync_io);
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251 if (init || curr_events - rdev->last_events > 64) {
8252 rdev->last_events = curr_events;
8253 idle = 0;
8254 }
8255 }
8256 rcu_read_unlock();
8257 return idle;
8258 }
8259
8260 void md_done_sync(struct mddev *mddev, int blocks, int ok)
8261 {
8262
8263 atomic_sub(blocks, &mddev->recovery_active);
8264 wake_up(&mddev->recovery_wait);
8265 if (!ok) {
8266 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8267 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8268 md_wakeup_thread(mddev->thread);
8269
8270 }
8271 }
8272 EXPORT_SYMBOL(md_done_sync);
8273
8274
8275
8276
8277
8278
8279
8280
8281 bool md_write_start(struct mddev *mddev, struct bio *bi)
8282 {
8283 int did_change = 0;
8284
8285 if (bio_data_dir(bi) != WRITE)
8286 return true;
8287
8288 BUG_ON(mddev->ro == 1);
8289 if (mddev->ro == 2) {
8290
8291 mddev->ro = 0;
8292 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8293 md_wakeup_thread(mddev->thread);
8294 md_wakeup_thread(mddev->sync_thread);
8295 did_change = 1;
8296 }
8297 rcu_read_lock();
8298 percpu_ref_get(&mddev->writes_pending);
8299 smp_mb();
8300 if (mddev->safemode == 1)
8301 mddev->safemode = 0;
8302
8303 if (mddev->in_sync || mddev->sync_checkers) {
8304 spin_lock(&mddev->lock);
8305 if (mddev->in_sync) {
8306 mddev->in_sync = 0;
8307 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8308 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8309 md_wakeup_thread(mddev->thread);
8310 did_change = 1;
8311 }
8312 spin_unlock(&mddev->lock);
8313 }
8314 rcu_read_unlock();
8315 if (did_change)
8316 sysfs_notify_dirent_safe(mddev->sysfs_state);
8317 if (!mddev->has_superblocks)
8318 return true;
8319 wait_event(mddev->sb_wait,
8320 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8321 mddev->suspended);
8322 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8323 percpu_ref_put(&mddev->writes_pending);
8324 return false;
8325 }
8326 return true;
8327 }
8328 EXPORT_SYMBOL(md_write_start);
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338 void md_write_inc(struct mddev *mddev, struct bio *bi)
8339 {
8340 if (bio_data_dir(bi) != WRITE)
8341 return;
8342 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8343 percpu_ref_get(&mddev->writes_pending);
8344 }
8345 EXPORT_SYMBOL(md_write_inc);
8346
8347 void md_write_end(struct mddev *mddev)
8348 {
8349 percpu_ref_put(&mddev->writes_pending);
8350
8351 if (mddev->safemode == 2)
8352 md_wakeup_thread(mddev->thread);
8353 else if (mddev->safemode_delay)
8354
8355
8356
8357 mod_timer(&mddev->safemode_timer,
8358 roundup(jiffies, mddev->safemode_delay) +
8359 mddev->safemode_delay);
8360 }
8361
8362 EXPORT_SYMBOL(md_write_end);
8363
8364
8365
8366
8367
8368
8369
8370 void md_allow_write(struct mddev *mddev)
8371 {
8372 if (!mddev->pers)
8373 return;
8374 if (mddev->ro)
8375 return;
8376 if (!mddev->pers->sync_request)
8377 return;
8378
8379 spin_lock(&mddev->lock);
8380 if (mddev->in_sync) {
8381 mddev->in_sync = 0;
8382 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8383 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8384 if (mddev->safemode_delay &&
8385 mddev->safemode == 0)
8386 mddev->safemode = 1;
8387 spin_unlock(&mddev->lock);
8388 md_update_sb(mddev, 0);
8389 sysfs_notify_dirent_safe(mddev->sysfs_state);
8390
8391 wait_event(mddev->sb_wait,
8392 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8393 } else
8394 spin_unlock(&mddev->lock);
8395 }
8396 EXPORT_SYMBOL_GPL(md_allow_write);
8397
8398 #define SYNC_MARKS 10
8399 #define SYNC_MARK_STEP (3*HZ)
8400 #define UPDATE_FREQUENCY (5*60*HZ)
8401 void md_do_sync(struct md_thread *thread)
8402 {
8403 struct mddev *mddev = thread->mddev;
8404 struct mddev *mddev2;
8405 unsigned int currspeed = 0, window;
8406 sector_t max_sectors,j, io_sectors, recovery_done;
8407 unsigned long mark[SYNC_MARKS];
8408 unsigned long update_time;
8409 sector_t mark_cnt[SYNC_MARKS];
8410 int last_mark,m;
8411 struct list_head *tmp;
8412 sector_t last_check;
8413 int skipped = 0;
8414 struct md_rdev *rdev;
8415 char *desc, *action = NULL;
8416 struct blk_plug plug;
8417 int ret;
8418
8419
8420 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8421 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8422 return;
8423 if (mddev->ro) {
8424 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8425 return;
8426 }
8427
8428 if (mddev_is_clustered(mddev)) {
8429 ret = md_cluster_ops->resync_start(mddev);
8430 if (ret)
8431 goto skip;
8432
8433 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8434 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8435 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8436 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8437 && ((unsigned long long)mddev->curr_resync_completed
8438 < (unsigned long long)mddev->resync_max_sectors))
8439 goto skip;
8440 }
8441
8442 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8443 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8444 desc = "data-check";
8445 action = "check";
8446 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8447 desc = "requested-resync";
8448 action = "repair";
8449 } else
8450 desc = "resync";
8451 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8452 desc = "reshape";
8453 else
8454 desc = "recovery";
8455
8456 mddev->last_sync_action = action ?: desc;
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474 do {
8475 int mddev2_minor = -1;
8476 mddev->curr_resync = 2;
8477
8478 try_again:
8479 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8480 goto skip;
8481 for_each_mddev(mddev2, tmp) {
8482 if (mddev2 == mddev)
8483 continue;
8484 if (!mddev->parallel_resync
8485 && mddev2->curr_resync
8486 && match_mddev_units(mddev, mddev2)) {
8487 DEFINE_WAIT(wq);
8488 if (mddev < mddev2 && mddev->curr_resync == 2) {
8489
8490 mddev->curr_resync = 1;
8491 wake_up(&resync_wait);
8492 }
8493 if (mddev > mddev2 && mddev->curr_resync == 1)
8494
8495
8496
8497 continue;
8498
8499
8500
8501
8502 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8503 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8504 mddev2->curr_resync >= mddev->curr_resync) {
8505 if (mddev2_minor != mddev2->md_minor) {
8506 mddev2_minor = mddev2->md_minor;
8507 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8508 desc, mdname(mddev),
8509 mdname(mddev2));
8510 }
8511 mddev_put(mddev2);
8512 if (signal_pending(current))
8513 flush_signals(current);
8514 schedule();
8515 finish_wait(&resync_wait, &wq);
8516 goto try_again;
8517 }
8518 finish_wait(&resync_wait, &wq);
8519 }
8520 }
8521 } while (mddev->curr_resync < 2);
8522
8523 j = 0;
8524 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8525
8526
8527
8528 max_sectors = mddev->resync_max_sectors;
8529 atomic64_set(&mddev->resync_mismatches, 0);
8530
8531 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8532 j = mddev->resync_min;
8533 else if (!mddev->bitmap)
8534 j = mddev->recovery_cp;
8535
8536 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8537 max_sectors = mddev->resync_max_sectors;
8538
8539
8540
8541
8542
8543 if (mddev_is_clustered(mddev) &&
8544 mddev->reshape_position != MaxSector)
8545 j = mddev->reshape_position;
8546 } else {
8547
8548 max_sectors = mddev->dev_sectors;
8549 j = MaxSector;
8550 rcu_read_lock();
8551 rdev_for_each_rcu(rdev, mddev)
8552 if (rdev->raid_disk >= 0 &&
8553 !test_bit(Journal, &rdev->flags) &&
8554 !test_bit(Faulty, &rdev->flags) &&
8555 !test_bit(In_sync, &rdev->flags) &&
8556 rdev->recovery_offset < j)
8557 j = rdev->recovery_offset;
8558 rcu_read_unlock();
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568 if (mddev->bitmap) {
8569 mddev->pers->quiesce(mddev, 1);
8570 mddev->pers->quiesce(mddev, 0);
8571 }
8572 }
8573
8574 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8575 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8576 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8577 speed_max(mddev), desc);
8578
8579 is_mddev_idle(mddev, 1);
8580
8581 io_sectors = 0;
8582 for (m = 0; m < SYNC_MARKS; m++) {
8583 mark[m] = jiffies;
8584 mark_cnt[m] = io_sectors;
8585 }
8586 last_mark = 0;
8587 mddev->resync_mark = mark[last_mark];
8588 mddev->resync_mark_cnt = mark_cnt[last_mark];
8589
8590
8591
8592
8593 window = 32 * (PAGE_SIZE / 512);
8594 pr_debug("md: using %dk window, over a total of %lluk.\n",
8595 window/2, (unsigned long long)max_sectors/2);
8596
8597 atomic_set(&mddev->recovery_active, 0);
8598 last_check = 0;
8599
8600 if (j>2) {
8601 pr_debug("md: resuming %s of %s from checkpoint.\n",
8602 desc, mdname(mddev));
8603 mddev->curr_resync = j;
8604 } else
8605 mddev->curr_resync = 3;
8606 mddev->curr_resync_completed = j;
8607 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8608 md_new_event(mddev);
8609 update_time = jiffies;
8610
8611 blk_start_plug(&plug);
8612 while (j < max_sectors) {
8613 sector_t sectors;
8614
8615 skipped = 0;
8616
8617 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8618 ((mddev->curr_resync > mddev->curr_resync_completed &&
8619 (mddev->curr_resync - mddev->curr_resync_completed)
8620 > (max_sectors >> 4)) ||
8621 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8622 (j - mddev->curr_resync_completed)*2
8623 >= mddev->resync_max - mddev->curr_resync_completed ||
8624 mddev->curr_resync_completed > mddev->resync_max
8625 )) {
8626
8627 wait_event(mddev->recovery_wait,
8628 atomic_read(&mddev->recovery_active) == 0);
8629 mddev->curr_resync_completed = j;
8630 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8631 j > mddev->recovery_cp)
8632 mddev->recovery_cp = j;
8633 update_time = jiffies;
8634 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8635 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8636 }
8637
8638 while (j >= mddev->resync_max &&
8639 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8640
8641
8642
8643
8644 flush_signals(current);
8645 wait_event_interruptible(mddev->recovery_wait,
8646 mddev->resync_max > j
8647 || test_bit(MD_RECOVERY_INTR,
8648 &mddev->recovery));
8649 }
8650
8651 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8652 break;
8653
8654 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8655 if (sectors == 0) {
8656 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8657 break;
8658 }
8659
8660 if (!skipped) {
8661 io_sectors += sectors;
8662 atomic_add(sectors, &mddev->recovery_active);
8663 }
8664
8665 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8666 break;
8667
8668 j += sectors;
8669 if (j > max_sectors)
8670
8671 j = max_sectors;
8672 if (j > 2)
8673 mddev->curr_resync = j;
8674 mddev->curr_mark_cnt = io_sectors;
8675 if (last_check == 0)
8676
8677
8678
8679 md_new_event(mddev);
8680
8681 if (last_check + window > io_sectors || j == max_sectors)
8682 continue;
8683
8684 last_check = io_sectors;
8685 repeat:
8686 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8687
8688 int next = (last_mark+1) % SYNC_MARKS;
8689
8690 mddev->resync_mark = mark[next];
8691 mddev->resync_mark_cnt = mark_cnt[next];
8692 mark[next] = jiffies;
8693 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8694 last_mark = next;
8695 }
8696
8697 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8698 break;
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708 cond_resched();
8709
8710 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8711 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8712 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8713
8714 if (currspeed > speed_min(mddev)) {
8715 if (currspeed > speed_max(mddev)) {
8716 msleep(500);
8717 goto repeat;
8718 }
8719 if (!is_mddev_idle(mddev, 0)) {
8720
8721
8722
8723
8724 wait_event(mddev->recovery_wait,
8725 !atomic_read(&mddev->recovery_active));
8726 }
8727 }
8728 }
8729 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8730 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8731 ? "interrupted" : "done");
8732
8733
8734
8735 blk_finish_plug(&plug);
8736 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8737
8738 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8739 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8740 mddev->curr_resync > 3) {
8741 mddev->curr_resync_completed = mddev->curr_resync;
8742 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8743 }
8744 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8745
8746 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8747 mddev->curr_resync > 3) {
8748 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8749 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8750 if (mddev->curr_resync >= mddev->recovery_cp) {
8751 pr_debug("md: checkpointing %s of %s.\n",
8752 desc, mdname(mddev));
8753 if (test_bit(MD_RECOVERY_ERROR,
8754 &mddev->recovery))
8755 mddev->recovery_cp =
8756 mddev->curr_resync_completed;
8757 else
8758 mddev->recovery_cp =
8759 mddev->curr_resync;
8760 }
8761 } else
8762 mddev->recovery_cp = MaxSector;
8763 } else {
8764 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8765 mddev->curr_resync = MaxSector;
8766 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8767 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8768 rcu_read_lock();
8769 rdev_for_each_rcu(rdev, mddev)
8770 if (rdev->raid_disk >= 0 &&
8771 mddev->delta_disks >= 0 &&
8772 !test_bit(Journal, &rdev->flags) &&
8773 !test_bit(Faulty, &rdev->flags) &&
8774 !test_bit(In_sync, &rdev->flags) &&
8775 rdev->recovery_offset < mddev->curr_resync)
8776 rdev->recovery_offset = mddev->curr_resync;
8777 rcu_read_unlock();
8778 }
8779 }
8780 }
8781 skip:
8782
8783
8784
8785 set_mask_bits(&mddev->sb_flags, 0,
8786 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8787
8788 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8789 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8790 mddev->delta_disks > 0 &&
8791 mddev->pers->finish_reshape &&
8792 mddev->pers->size &&
8793 mddev->queue) {
8794 mddev_lock_nointr(mddev);
8795 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8796 mddev_unlock(mddev);
8797 if (!mddev_is_clustered(mddev)) {
8798 set_capacity(mddev->gendisk, mddev->array_sectors);
8799 revalidate_disk(mddev->gendisk);
8800 }
8801 }
8802
8803 spin_lock(&mddev->lock);
8804 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8805
8806 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8807 mddev->resync_min = 0;
8808 mddev->resync_max = MaxSector;
8809 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8810 mddev->resync_min = mddev->curr_resync_completed;
8811 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8812 mddev->curr_resync = 0;
8813 spin_unlock(&mddev->lock);
8814
8815 wake_up(&resync_wait);
8816 md_wakeup_thread(mddev->thread);
8817 return;
8818 }
8819 EXPORT_SYMBOL_GPL(md_do_sync);
8820
8821 static int remove_and_add_spares(struct mddev *mddev,
8822 struct md_rdev *this)
8823 {
8824 struct md_rdev *rdev;
8825 int spares = 0;
8826 int removed = 0;
8827 bool remove_some = false;
8828
8829 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8830
8831 return 0;
8832
8833 rdev_for_each(rdev, mddev) {
8834 if ((this == NULL || rdev == this) &&
8835 rdev->raid_disk >= 0 &&
8836 !test_bit(Blocked, &rdev->flags) &&
8837 test_bit(Faulty, &rdev->flags) &&
8838 atomic_read(&rdev->nr_pending)==0) {
8839
8840
8841
8842
8843
8844 remove_some = true;
8845 set_bit(RemoveSynchronized, &rdev->flags);
8846 }
8847 }
8848
8849 if (remove_some)
8850 synchronize_rcu();
8851 rdev_for_each(rdev, mddev) {
8852 if ((this == NULL || rdev == this) &&
8853 rdev->raid_disk >= 0 &&
8854 !test_bit(Blocked, &rdev->flags) &&
8855 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8856 (!test_bit(In_sync, &rdev->flags) &&
8857 !test_bit(Journal, &rdev->flags))) &&
8858 atomic_read(&rdev->nr_pending)==0)) {
8859 if (mddev->pers->hot_remove_disk(
8860 mddev, rdev) == 0) {
8861 sysfs_unlink_rdev(mddev, rdev);
8862 rdev->saved_raid_disk = rdev->raid_disk;
8863 rdev->raid_disk = -1;
8864 removed++;
8865 }
8866 }
8867 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8868 clear_bit(RemoveSynchronized, &rdev->flags);
8869 }
8870
8871 if (removed && mddev->kobj.sd)
8872 sysfs_notify(&mddev->kobj, NULL, "degraded");
8873
8874 if (this && removed)
8875 goto no_add;
8876
8877 rdev_for_each(rdev, mddev) {
8878 if (this && this != rdev)
8879 continue;
8880 if (test_bit(Candidate, &rdev->flags))
8881 continue;
8882 if (rdev->raid_disk >= 0 &&
8883 !test_bit(In_sync, &rdev->flags) &&
8884 !test_bit(Journal, &rdev->flags) &&
8885 !test_bit(Faulty, &rdev->flags))
8886 spares++;
8887 if (rdev->raid_disk >= 0)
8888 continue;
8889 if (test_bit(Faulty, &rdev->flags))
8890 continue;
8891 if (!test_bit(Journal, &rdev->flags)) {
8892 if (mddev->ro &&
8893 ! (rdev->saved_raid_disk >= 0 &&
8894 !test_bit(Bitmap_sync, &rdev->flags)))
8895 continue;
8896
8897 rdev->recovery_offset = 0;
8898 }
8899 if (mddev->pers->
8900 hot_add_disk(mddev, rdev) == 0) {
8901 if (sysfs_link_rdev(mddev, rdev))
8902 ;
8903 if (!test_bit(Journal, &rdev->flags))
8904 spares++;
8905 md_new_event(mddev);
8906 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8907 }
8908 }
8909 no_add:
8910 if (removed)
8911 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8912 return spares;
8913 }
8914
8915 static void md_start_sync(struct work_struct *ws)
8916 {
8917 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8918
8919 mddev->sync_thread = md_register_thread(md_do_sync,
8920 mddev,
8921 "resync");
8922 if (!mddev->sync_thread) {
8923 pr_warn("%s: could not start resync thread...\n",
8924 mdname(mddev));
8925
8926 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8927 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8928 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8929 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8930 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8931 wake_up(&resync_wait);
8932 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8933 &mddev->recovery))
8934 if (mddev->sysfs_action)
8935 sysfs_notify_dirent_safe(mddev->sysfs_action);
8936 } else
8937 md_wakeup_thread(mddev->sync_thread);
8938 sysfs_notify_dirent_safe(mddev->sysfs_action);
8939 md_new_event(mddev);
8940 }
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964 void md_check_recovery(struct mddev *mddev)
8965 {
8966 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
8967
8968
8969
8970 set_bit(MD_UPDATING_SB, &mddev->flags);
8971 smp_mb__after_atomic();
8972 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
8973 md_update_sb(mddev, 0);
8974 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
8975 wake_up(&mddev->sb_wait);
8976 }
8977
8978 if (mddev->suspended)
8979 return;
8980
8981 if (mddev->bitmap)
8982 md_bitmap_daemon_work(mddev);
8983
8984 if (signal_pending(current)) {
8985 if (mddev->pers->sync_request && !mddev->external) {
8986 pr_debug("md: %s in immediate safe mode\n",
8987 mdname(mddev));
8988 mddev->safemode = 2;
8989 }
8990 flush_signals(current);
8991 }
8992
8993 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8994 return;
8995 if ( ! (
8996 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8997 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8998 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8999 (mddev->external == 0 && mddev->safemode == 1) ||
9000 (mddev->safemode == 2
9001 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9002 ))
9003 return;
9004
9005 if (mddev_trylock(mddev)) {
9006 int spares = 0;
9007 bool try_set_sync = mddev->safemode != 0;
9008
9009 if (!mddev->external && mddev->safemode == 1)
9010 mddev->safemode = 0;
9011
9012 if (mddev->ro) {
9013 struct md_rdev *rdev;
9014 if (!mddev->external && mddev->in_sync)
9015
9016
9017
9018
9019
9020 rdev_for_each(rdev, mddev)
9021 clear_bit(Blocked, &rdev->flags);
9022
9023
9024
9025
9026
9027
9028
9029 remove_and_add_spares(mddev, NULL);
9030
9031
9032
9033 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9034 md_reap_sync_thread(mddev);
9035 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9036 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9037 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9038 goto unlock;
9039 }
9040
9041 if (mddev_is_clustered(mddev)) {
9042 struct md_rdev *rdev;
9043
9044
9045
9046 rdev_for_each(rdev, mddev) {
9047 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9048 rdev->raid_disk < 0)
9049 md_kick_rdev_from_array(rdev);
9050 }
9051 }
9052
9053 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9054 spin_lock(&mddev->lock);
9055 set_in_sync(mddev);
9056 spin_unlock(&mddev->lock);
9057 }
9058
9059 if (mddev->sb_flags)
9060 md_update_sb(mddev, 0);
9061
9062 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9063 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9064
9065 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9066 goto unlock;
9067 }
9068 if (mddev->sync_thread) {
9069 md_reap_sync_thread(mddev);
9070 goto unlock;
9071 }
9072
9073
9074
9075 mddev->curr_resync_completed = 0;
9076 spin_lock(&mddev->lock);
9077 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9078 spin_unlock(&mddev->lock);
9079
9080
9081
9082 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9083 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9084
9085 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9086 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9087 goto not_running;
9088
9089
9090
9091
9092
9093
9094
9095 if (mddev->reshape_position != MaxSector) {
9096 if (mddev->pers->check_reshape == NULL ||
9097 mddev->pers->check_reshape(mddev) != 0)
9098
9099 goto not_running;
9100 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9101 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9102 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9103 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9104 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9105 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9106 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9107 } else if (mddev->recovery_cp < MaxSector) {
9108 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9109 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9110 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9111
9112 goto not_running;
9113
9114 if (mddev->pers->sync_request) {
9115 if (spares) {
9116
9117
9118
9119
9120 md_bitmap_write_all(mddev->bitmap);
9121 }
9122 INIT_WORK(&mddev->del_work, md_start_sync);
9123 queue_work(md_misc_wq, &mddev->del_work);
9124 goto unlock;
9125 }
9126 not_running:
9127 if (!mddev->sync_thread) {
9128 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9129 wake_up(&resync_wait);
9130 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9131 &mddev->recovery))
9132 if (mddev->sysfs_action)
9133 sysfs_notify_dirent_safe(mddev->sysfs_action);
9134 }
9135 unlock:
9136 wake_up(&mddev->sb_wait);
9137 mddev_unlock(mddev);
9138 }
9139 }
9140 EXPORT_SYMBOL(md_check_recovery);
9141
9142 void md_reap_sync_thread(struct mddev *mddev)
9143 {
9144 struct md_rdev *rdev;
9145 sector_t old_dev_sectors = mddev->dev_sectors;
9146 bool is_reshaped = false;
9147
9148
9149 md_unregister_thread(&mddev->sync_thread);
9150 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9151 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9152 mddev->degraded != mddev->raid_disks) {
9153
9154
9155 if (mddev->pers->spare_active(mddev)) {
9156 sysfs_notify(&mddev->kobj, NULL,
9157 "degraded");
9158 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9159 }
9160 }
9161 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9162 mddev->pers->finish_reshape) {
9163 mddev->pers->finish_reshape(mddev);
9164 if (mddev_is_clustered(mddev))
9165 is_reshaped = true;
9166 }
9167
9168
9169
9170
9171 if (!mddev->degraded)
9172 rdev_for_each(rdev, mddev)
9173 rdev->saved_raid_disk = -1;
9174
9175 md_update_sb(mddev, 1);
9176
9177
9178
9179 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9180 md_cluster_ops->resync_finish(mddev);
9181 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9182 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9183 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9184 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9185 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9186 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9187
9188
9189
9190
9191
9192 if (mddev_is_clustered(mddev) && is_reshaped
9193 && !test_bit(MD_CLOSING, &mddev->flags))
9194 md_cluster_ops->update_size(mddev, old_dev_sectors);
9195 wake_up(&resync_wait);
9196
9197 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9198 sysfs_notify_dirent_safe(mddev->sysfs_action);
9199 md_new_event(mddev);
9200 if (mddev->event_work.func)
9201 queue_work(md_misc_wq, &mddev->event_work);
9202 }
9203 EXPORT_SYMBOL(md_reap_sync_thread);
9204
9205 void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9206 {
9207 sysfs_notify_dirent_safe(rdev->sysfs_state);
9208 wait_event_timeout(rdev->blocked_wait,
9209 !test_bit(Blocked, &rdev->flags) &&
9210 !test_bit(BlockedBadBlocks, &rdev->flags),
9211 msecs_to_jiffies(5000));
9212 rdev_dec_pending(rdev, mddev);
9213 }
9214 EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9215
9216 void md_finish_reshape(struct mddev *mddev)
9217 {
9218
9219 struct md_rdev *rdev;
9220
9221 rdev_for_each(rdev, mddev) {
9222 if (rdev->data_offset > rdev->new_data_offset)
9223 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9224 else
9225 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9226 rdev->data_offset = rdev->new_data_offset;
9227 }
9228 }
9229 EXPORT_SYMBOL(md_finish_reshape);
9230
9231
9232
9233
9234 int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9235 int is_new)
9236 {
9237 struct mddev *mddev = rdev->mddev;
9238 int rv;
9239 if (is_new)
9240 s += rdev->new_data_offset;
9241 else
9242 s += rdev->data_offset;
9243 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9244 if (rv == 0) {
9245
9246 if (test_bit(ExternalBbl, &rdev->flags))
9247 sysfs_notify(&rdev->kobj, NULL,
9248 "unacknowledged_bad_blocks");
9249 sysfs_notify_dirent_safe(rdev->sysfs_state);
9250 set_mask_bits(&mddev->sb_flags, 0,
9251 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9252 md_wakeup_thread(rdev->mddev->thread);
9253 return 1;
9254 } else
9255 return 0;
9256 }
9257 EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9258
9259 int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9260 int is_new)
9261 {
9262 int rv;
9263 if (is_new)
9264 s += rdev->new_data_offset;
9265 else
9266 s += rdev->data_offset;
9267 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9268 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9269 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9270 return rv;
9271 }
9272 EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9273
9274 static int md_notify_reboot(struct notifier_block *this,
9275 unsigned long code, void *x)
9276 {
9277 struct list_head *tmp;
9278 struct mddev *mddev;
9279 int need_delay = 0;
9280
9281 for_each_mddev(mddev, tmp) {
9282 if (mddev_trylock(mddev)) {
9283 if (mddev->pers)
9284 __md_stop_writes(mddev);
9285 if (mddev->persistent)
9286 mddev->safemode = 2;
9287 mddev_unlock(mddev);
9288 }
9289 need_delay = 1;
9290 }
9291
9292
9293
9294
9295
9296
9297 if (need_delay)
9298 mdelay(1000*1);
9299
9300 return NOTIFY_DONE;
9301 }
9302
9303 static struct notifier_block md_notifier = {
9304 .notifier_call = md_notify_reboot,
9305 .next = NULL,
9306 .priority = INT_MAX,
9307 };
9308
9309 static void md_geninit(void)
9310 {
9311 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9312
9313 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
9314 }
9315
9316 static int __init md_init(void)
9317 {
9318 int ret = -ENOMEM;
9319
9320 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9321 if (!md_wq)
9322 goto err_wq;
9323
9324 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9325 if (!md_misc_wq)
9326 goto err_misc_wq;
9327
9328 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9329 goto err_md;
9330
9331 if ((ret = register_blkdev(0, "mdp")) < 0)
9332 goto err_mdp;
9333 mdp_major = ret;
9334
9335 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9336 md_probe, NULL, NULL);
9337 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9338 md_probe, NULL, NULL);
9339
9340 register_reboot_notifier(&md_notifier);
9341 raid_table_header = register_sysctl_table(raid_root_table);
9342
9343 md_geninit();
9344 return 0;
9345
9346 err_mdp:
9347 unregister_blkdev(MD_MAJOR, "md");
9348 err_md:
9349 destroy_workqueue(md_misc_wq);
9350 err_misc_wq:
9351 destroy_workqueue(md_wq);
9352 err_wq:
9353 return ret;
9354 }
9355
9356 static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9357 {
9358 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9359 struct md_rdev *rdev2;
9360 int role, ret;
9361 char b[BDEVNAME_SIZE];
9362
9363
9364
9365
9366
9367 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9368 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9369 if (ret)
9370 pr_info("md-cluster: resize failed\n");
9371 else
9372 md_bitmap_update_sb(mddev->bitmap);
9373 }
9374
9375
9376 rdev_for_each(rdev2, mddev) {
9377 if (test_bit(Faulty, &rdev2->flags))
9378 continue;
9379
9380
9381 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9382
9383 if (test_bit(Candidate, &rdev2->flags)) {
9384 if (role == 0xfffe) {
9385 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9386 md_kick_rdev_from_array(rdev2);
9387 continue;
9388 }
9389 else
9390 clear_bit(Candidate, &rdev2->flags);
9391 }
9392
9393 if (role != rdev2->raid_disk) {
9394
9395
9396
9397 if (rdev2->raid_disk == -1 && role != 0xffff &&
9398 !(le32_to_cpu(sb->feature_map) &
9399 MD_FEATURE_RESHAPE_ACTIVE)) {
9400 rdev2->saved_raid_disk = role;
9401 ret = remove_and_add_spares(mddev, rdev2);
9402 pr_info("Activated spare: %s\n",
9403 bdevname(rdev2->bdev,b));
9404
9405
9406 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9407 md_wakeup_thread(mddev->thread);
9408 }
9409
9410
9411
9412
9413
9414 if ((role == 0xfffe) || (role == 0xfffd)) {
9415 md_error(mddev, rdev2);
9416 clear_bit(Blocked, &rdev2->flags);
9417 }
9418 }
9419 }
9420
9421 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9422 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9423
9424
9425
9426
9427
9428 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9429 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9430
9431
9432
9433
9434 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9435 if (mddev->pers->update_reshape_pos)
9436 mddev->pers->update_reshape_pos(mddev);
9437 if (mddev->pers->start_reshape)
9438 mddev->pers->start_reshape(mddev);
9439 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9440 mddev->reshape_position != MaxSector &&
9441 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9442
9443 mddev->reshape_position = MaxSector;
9444 if (mddev->pers->update_reshape_pos)
9445 mddev->pers->update_reshape_pos(mddev);
9446 }
9447
9448
9449 mddev->events = le64_to_cpu(sb->events);
9450 }
9451
9452 static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9453 {
9454 int err;
9455 struct page *swapout = rdev->sb_page;
9456 struct mdp_superblock_1 *sb;
9457
9458
9459
9460
9461 rdev->sb_page = NULL;
9462 err = alloc_disk_sb(rdev);
9463 if (err == 0) {
9464 ClearPageUptodate(rdev->sb_page);
9465 rdev->sb_loaded = 0;
9466 err = super_types[mddev->major_version].
9467 load_super(rdev, NULL, mddev->minor_version);
9468 }
9469 if (err < 0) {
9470 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9471 __func__, __LINE__, rdev->desc_nr, err);
9472 if (rdev->sb_page)
9473 put_page(rdev->sb_page);
9474 rdev->sb_page = swapout;
9475 rdev->sb_loaded = 1;
9476 return err;
9477 }
9478
9479 sb = page_address(rdev->sb_page);
9480
9481
9482
9483
9484 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9485 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9486
9487
9488
9489
9490 if (rdev->recovery_offset == MaxSector &&
9491 !test_bit(In_sync, &rdev->flags) &&
9492 mddev->pers->spare_active(mddev))
9493 sysfs_notify(&mddev->kobj, NULL, "degraded");
9494
9495 put_page(swapout);
9496 return 0;
9497 }
9498
9499 void md_reload_sb(struct mddev *mddev, int nr)
9500 {
9501 struct md_rdev *rdev;
9502 int err;
9503
9504
9505 rdev_for_each_rcu(rdev, mddev) {
9506 if (rdev->desc_nr == nr)
9507 break;
9508 }
9509
9510 if (!rdev || rdev->desc_nr != nr) {
9511 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9512 return;
9513 }
9514
9515 err = read_rdev(mddev, rdev);
9516 if (err < 0)
9517 return;
9518
9519 check_sb_changes(mddev, rdev);
9520
9521
9522 rdev_for_each_rcu(rdev, mddev) {
9523 if (!test_bit(Faulty, &rdev->flags))
9524 read_rdev(mddev, rdev);
9525 }
9526 }
9527 EXPORT_SYMBOL(md_reload_sb);
9528
9529 #ifndef MODULE
9530
9531
9532
9533
9534
9535
9536 static DEFINE_MUTEX(detected_devices_mutex);
9537 static LIST_HEAD(all_detected_devices);
9538 struct detected_devices_node {
9539 struct list_head list;
9540 dev_t dev;
9541 };
9542
9543 void md_autodetect_dev(dev_t dev)
9544 {
9545 struct detected_devices_node *node_detected_dev;
9546
9547 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9548 if (node_detected_dev) {
9549 node_detected_dev->dev = dev;
9550 mutex_lock(&detected_devices_mutex);
9551 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9552 mutex_unlock(&detected_devices_mutex);
9553 }
9554 }
9555
9556 static void autostart_arrays(int part)
9557 {
9558 struct md_rdev *rdev;
9559 struct detected_devices_node *node_detected_dev;
9560 dev_t dev;
9561 int i_scanned, i_passed;
9562
9563 i_scanned = 0;
9564 i_passed = 0;
9565
9566 pr_info("md: Autodetecting RAID arrays.\n");
9567
9568 mutex_lock(&detected_devices_mutex);
9569 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9570 i_scanned++;
9571 node_detected_dev = list_entry(all_detected_devices.next,
9572 struct detected_devices_node, list);
9573 list_del(&node_detected_dev->list);
9574 dev = node_detected_dev->dev;
9575 kfree(node_detected_dev);
9576 mutex_unlock(&detected_devices_mutex);
9577 rdev = md_import_device(dev,0, 90);
9578 mutex_lock(&detected_devices_mutex);
9579 if (IS_ERR(rdev))
9580 continue;
9581
9582 if (test_bit(Faulty, &rdev->flags))
9583 continue;
9584
9585 set_bit(AutoDetected, &rdev->flags);
9586 list_add(&rdev->same_set, &pending_raid_disks);
9587 i_passed++;
9588 }
9589 mutex_unlock(&detected_devices_mutex);
9590
9591 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9592
9593 autorun_devices(part);
9594 }
9595
9596 #endif
9597
9598 static __exit void md_exit(void)
9599 {
9600 struct mddev *mddev;
9601 struct list_head *tmp;
9602 int delay = 1;
9603
9604 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9605 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9606
9607 unregister_blkdev(MD_MAJOR,"md");
9608 unregister_blkdev(mdp_major, "mdp");
9609 unregister_reboot_notifier(&md_notifier);
9610 unregister_sysctl_table(raid_table_header);
9611
9612
9613
9614
9615 md_unloading = 1;
9616 while (waitqueue_active(&md_event_waiters)) {
9617
9618 wake_up(&md_event_waiters);
9619 msleep(delay);
9620 delay += delay;
9621 }
9622 remove_proc_entry("mdstat", NULL);
9623
9624 for_each_mddev(mddev, tmp) {
9625 export_array(mddev);
9626 mddev->ctime = 0;
9627 mddev->hold_active = 0;
9628
9629
9630
9631
9632
9633
9634 }
9635 destroy_workqueue(md_misc_wq);
9636 destroy_workqueue(md_wq);
9637 }
9638
9639 subsys_initcall(md_init);
9640 module_exit(md_exit)
9641
9642 static int get_ro(char *buffer, const struct kernel_param *kp)
9643 {
9644 return sprintf(buffer, "%d", start_readonly);
9645 }
9646 static int set_ro(const char *val, const struct kernel_param *kp)
9647 {
9648 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9649 }
9650
9651 module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9652 module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9653 module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9654 module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9655
9656 MODULE_LICENSE("GPL");
9657 MODULE_DESCRIPTION("MD RAID framework");
9658 MODULE_ALIAS("md");
9659 MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);