root/drivers/md/md-multipath.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. multipath_map
  2. multipath_reschedule_retry
  3. multipath_end_bh_io
  4. multipath_end_request
  5. multipath_make_request
  6. multipath_status
  7. multipath_congested
  8. multipath_error
  9. print_multipath_conf
  10. multipath_add_disk
  11. multipath_remove_disk
  12. multipathd
  13. multipath_size
  14. multipath_run
  15. multipath_free
  16. multipath_init
  17. multipath_exit

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * multipath.c : Multiple Devices driver for Linux
   4  *
   5  * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
   6  *
   7  * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
   8  *
   9  * MULTIPATH management functions.
  10  *
  11  * derived from raid1.c.
  12  */
  13 
  14 #include <linux/blkdev.h>
  15 #include <linux/module.h>
  16 #include <linux/raid/md_u.h>
  17 #include <linux/seq_file.h>
  18 #include <linux/slab.h>
  19 #include "md.h"
  20 #include "md-multipath.h"
  21 
  22 #define MAX_WORK_PER_DISK 128
  23 
  24 #define NR_RESERVED_BUFS        32
  25 
  26 static int multipath_map (struct mpconf *conf)
  27 {
  28         int i, disks = conf->raid_disks;
  29 
  30         /*
  31          * Later we do read balancing on the read side
  32          * now we use the first available disk.
  33          */
  34 
  35         rcu_read_lock();
  36         for (i = 0; i < disks; i++) {
  37                 struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
  38                 if (rdev && test_bit(In_sync, &rdev->flags) &&
  39                     !test_bit(Faulty, &rdev->flags)) {
  40                         atomic_inc(&rdev->nr_pending);
  41                         rcu_read_unlock();
  42                         return i;
  43                 }
  44         }
  45         rcu_read_unlock();
  46 
  47         pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
  48         return (-1);
  49 }
  50 
  51 static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
  52 {
  53         unsigned long flags;
  54         struct mddev *mddev = mp_bh->mddev;
  55         struct mpconf *conf = mddev->private;
  56 
  57         spin_lock_irqsave(&conf->device_lock, flags);
  58         list_add(&mp_bh->retry_list, &conf->retry_list);
  59         spin_unlock_irqrestore(&conf->device_lock, flags);
  60         md_wakeup_thread(mddev->thread);
  61 }
  62 
  63 /*
  64  * multipath_end_bh_io() is called when we have finished servicing a multipathed
  65  * operation and are ready to return a success/failure code to the buffer
  66  * cache layer.
  67  */
  68 static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
  69 {
  70         struct bio *bio = mp_bh->master_bio;
  71         struct mpconf *conf = mp_bh->mddev->private;
  72 
  73         bio->bi_status = status;
  74         bio_endio(bio);
  75         mempool_free(mp_bh, &conf->pool);
  76 }
  77 
  78 static void multipath_end_request(struct bio *bio)
  79 {
  80         struct multipath_bh *mp_bh = bio->bi_private;
  81         struct mpconf *conf = mp_bh->mddev->private;
  82         struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
  83 
  84         if (!bio->bi_status)
  85                 multipath_end_bh_io(mp_bh, 0);
  86         else if (!(bio->bi_opf & REQ_RAHEAD)) {
  87                 /*
  88                  * oops, IO error:
  89                  */
  90                 char b[BDEVNAME_SIZE];
  91                 md_error (mp_bh->mddev, rdev);
  92                 pr_info("multipath: %s: rescheduling sector %llu\n",
  93                         bdevname(rdev->bdev,b),
  94                         (unsigned long long)bio->bi_iter.bi_sector);
  95                 multipath_reschedule_retry(mp_bh);
  96         } else
  97                 multipath_end_bh_io(mp_bh, bio->bi_status);
  98         rdev_dec_pending(rdev, conf->mddev);
  99 }
 100 
 101 static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
 102 {
 103         struct mpconf *conf = mddev->private;
 104         struct multipath_bh * mp_bh;
 105         struct multipath_info *multipath;
 106 
 107         if (unlikely(bio->bi_opf & REQ_PREFLUSH)
 108             && md_flush_request(mddev, bio))
 109                 return true;
 110 
 111         mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
 112 
 113         mp_bh->master_bio = bio;
 114         mp_bh->mddev = mddev;
 115 
 116         mp_bh->path = multipath_map(conf);
 117         if (mp_bh->path < 0) {
 118                 bio_io_error(bio);
 119                 mempool_free(mp_bh, &conf->pool);
 120                 return true;
 121         }
 122         multipath = conf->multipaths + mp_bh->path;
 123 
 124         bio_init(&mp_bh->bio, NULL, 0);
 125         __bio_clone_fast(&mp_bh->bio, bio);
 126 
 127         mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
 128         bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
 129         mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
 130         mp_bh->bio.bi_end_io = multipath_end_request;
 131         mp_bh->bio.bi_private = mp_bh;
 132         mddev_check_writesame(mddev, &mp_bh->bio);
 133         mddev_check_write_zeroes(mddev, &mp_bh->bio);
 134         generic_make_request(&mp_bh->bio);
 135         return true;
 136 }
 137 
 138 static void multipath_status(struct seq_file *seq, struct mddev *mddev)
 139 {
 140         struct mpconf *conf = mddev->private;
 141         int i;
 142 
 143         seq_printf (seq, " [%d/%d] [", conf->raid_disks,
 144                     conf->raid_disks - mddev->degraded);
 145         rcu_read_lock();
 146         for (i = 0; i < conf->raid_disks; i++) {
 147                 struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
 148                 seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
 149         }
 150         rcu_read_unlock();
 151         seq_putc(seq, ']');
 152 }
 153 
 154 static int multipath_congested(struct mddev *mddev, int bits)
 155 {
 156         struct mpconf *conf = mddev->private;
 157         int i, ret = 0;
 158 
 159         rcu_read_lock();
 160         for (i = 0; i < mddev->raid_disks ; i++) {
 161                 struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
 162                 if (rdev && !test_bit(Faulty, &rdev->flags)) {
 163                         struct request_queue *q = bdev_get_queue(rdev->bdev);
 164 
 165                         ret |= bdi_congested(q->backing_dev_info, bits);
 166                         /* Just like multipath_map, we just check the
 167                          * first available device
 168                          */
 169                         break;
 170                 }
 171         }
 172         rcu_read_unlock();
 173         return ret;
 174 }
 175 
 176 /*
 177  * Careful, this can execute in IRQ contexts as well!
 178  */
 179 static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
 180 {
 181         struct mpconf *conf = mddev->private;
 182         char b[BDEVNAME_SIZE];
 183 
 184         if (conf->raid_disks - mddev->degraded <= 1) {
 185                 /*
 186                  * Uh oh, we can do nothing if this is our last path, but
 187                  * first check if this is a queued request for a device
 188                  * which has just failed.
 189                  */
 190                 pr_warn("multipath: only one IO path left and IO error.\n");
 191                 /* leave it active... it's all we have */
 192                 return;
 193         }
 194         /*
 195          * Mark disk as unusable
 196          */
 197         if (test_and_clear_bit(In_sync, &rdev->flags)) {
 198                 unsigned long flags;
 199                 spin_lock_irqsave(&conf->device_lock, flags);
 200                 mddev->degraded++;
 201                 spin_unlock_irqrestore(&conf->device_lock, flags);
 202         }
 203         set_bit(Faulty, &rdev->flags);
 204         set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
 205         pr_err("multipath: IO failure on %s, disabling IO path.\n"
 206                "multipath: Operation continuing on %d IO paths.\n",
 207                bdevname(rdev->bdev, b),
 208                conf->raid_disks - mddev->degraded);
 209 }
 210 
 211 static void print_multipath_conf (struct mpconf *conf)
 212 {
 213         int i;
 214         struct multipath_info *tmp;
 215 
 216         pr_debug("MULTIPATH conf printout:\n");
 217         if (!conf) {
 218                 pr_debug("(conf==NULL)\n");
 219                 return;
 220         }
 221         pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
 222                  conf->raid_disks);
 223 
 224         for (i = 0; i < conf->raid_disks; i++) {
 225                 char b[BDEVNAME_SIZE];
 226                 tmp = conf->multipaths + i;
 227                 if (tmp->rdev)
 228                         pr_debug(" disk%d, o:%d, dev:%s\n",
 229                                  i,!test_bit(Faulty, &tmp->rdev->flags),
 230                                  bdevname(tmp->rdev->bdev,b));
 231         }
 232 }
 233 
 234 static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 235 {
 236         struct mpconf *conf = mddev->private;
 237         int err = -EEXIST;
 238         int path;
 239         struct multipath_info *p;
 240         int first = 0;
 241         int last = mddev->raid_disks - 1;
 242 
 243         if (rdev->raid_disk >= 0)
 244                 first = last = rdev->raid_disk;
 245 
 246         print_multipath_conf(conf);
 247 
 248         for (path = first; path <= last; path++)
 249                 if ((p=conf->multipaths+path)->rdev == NULL) {
 250                         disk_stack_limits(mddev->gendisk, rdev->bdev,
 251                                           rdev->data_offset << 9);
 252 
 253                         err = md_integrity_add_rdev(rdev, mddev);
 254                         if (err)
 255                                 break;
 256                         spin_lock_irq(&conf->device_lock);
 257                         mddev->degraded--;
 258                         rdev->raid_disk = path;
 259                         set_bit(In_sync, &rdev->flags);
 260                         spin_unlock_irq(&conf->device_lock);
 261                         rcu_assign_pointer(p->rdev, rdev);
 262                         err = 0;
 263                         break;
 264                 }
 265 
 266         print_multipath_conf(conf);
 267 
 268         return err;
 269 }
 270 
 271 static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 272 {
 273         struct mpconf *conf = mddev->private;
 274         int err = 0;
 275         int number = rdev->raid_disk;
 276         struct multipath_info *p = conf->multipaths + number;
 277 
 278         print_multipath_conf(conf);
 279 
 280         if (rdev == p->rdev) {
 281                 if (test_bit(In_sync, &rdev->flags) ||
 282                     atomic_read(&rdev->nr_pending)) {
 283                         pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
 284                         err = -EBUSY;
 285                         goto abort;
 286                 }
 287                 p->rdev = NULL;
 288                 if (!test_bit(RemoveSynchronized, &rdev->flags)) {
 289                         synchronize_rcu();
 290                         if (atomic_read(&rdev->nr_pending)) {
 291                                 /* lost the race, try later */
 292                                 err = -EBUSY;
 293                                 p->rdev = rdev;
 294                                 goto abort;
 295                         }
 296                 }
 297                 err = md_integrity_register(mddev);
 298         }
 299 abort:
 300 
 301         print_multipath_conf(conf);
 302         return err;
 303 }
 304 
 305 /*
 306  * This is a kernel thread which:
 307  *
 308  *      1.      Retries failed read operations on working multipaths.
 309  *      2.      Updates the raid superblock when problems encounter.
 310  *      3.      Performs writes following reads for array syncronising.
 311  */
 312 
 313 static void multipathd(struct md_thread *thread)
 314 {
 315         struct mddev *mddev = thread->mddev;
 316         struct multipath_bh *mp_bh;
 317         struct bio *bio;
 318         unsigned long flags;
 319         struct mpconf *conf = mddev->private;
 320         struct list_head *head = &conf->retry_list;
 321 
 322         md_check_recovery(mddev);
 323         for (;;) {
 324                 char b[BDEVNAME_SIZE];
 325                 spin_lock_irqsave(&conf->device_lock, flags);
 326                 if (list_empty(head))
 327                         break;
 328                 mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
 329                 list_del(head->prev);
 330                 spin_unlock_irqrestore(&conf->device_lock, flags);
 331 
 332                 bio = &mp_bh->bio;
 333                 bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
 334 
 335                 if ((mp_bh->path = multipath_map (conf))<0) {
 336                         pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
 337                                bio_devname(bio, b),
 338                                (unsigned long long)bio->bi_iter.bi_sector);
 339                         multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
 340                 } else {
 341                         pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
 342                                bio_devname(bio, b),
 343                                (unsigned long long)bio->bi_iter.bi_sector);
 344                         *bio = *(mp_bh->master_bio);
 345                         bio->bi_iter.bi_sector +=
 346                                 conf->multipaths[mp_bh->path].rdev->data_offset;
 347                         bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
 348                         bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
 349                         bio->bi_end_io = multipath_end_request;
 350                         bio->bi_private = mp_bh;
 351                         generic_make_request(bio);
 352                 }
 353         }
 354         spin_unlock_irqrestore(&conf->device_lock, flags);
 355 }
 356 
 357 static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
 358 {
 359         WARN_ONCE(sectors || raid_disks,
 360                   "%s does not support generic reshape\n", __func__);
 361 
 362         return mddev->dev_sectors;
 363 }
 364 
 365 static int multipath_run (struct mddev *mddev)
 366 {
 367         struct mpconf *conf;
 368         int disk_idx;
 369         struct multipath_info *disk;
 370         struct md_rdev *rdev;
 371         int working_disks;
 372         int ret;
 373 
 374         if (md_check_no_bitmap(mddev))
 375                 return -EINVAL;
 376 
 377         if (mddev->level != LEVEL_MULTIPATH) {
 378                 pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
 379                         mdname(mddev), mddev->level);
 380                 goto out;
 381         }
 382         /*
 383          * copy the already verified devices into our private MULTIPATH
 384          * bookkeeping area. [whatever we allocate in multipath_run(),
 385          * should be freed in multipath_free()]
 386          */
 387 
 388         conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
 389         mddev->private = conf;
 390         if (!conf)
 391                 goto out;
 392 
 393         conf->multipaths = kcalloc(mddev->raid_disks,
 394                                    sizeof(struct multipath_info),
 395                                    GFP_KERNEL);
 396         if (!conf->multipaths)
 397                 goto out_free_conf;
 398 
 399         working_disks = 0;
 400         rdev_for_each(rdev, mddev) {
 401                 disk_idx = rdev->raid_disk;
 402                 if (disk_idx < 0 ||
 403                     disk_idx >= mddev->raid_disks)
 404                         continue;
 405 
 406                 disk = conf->multipaths + disk_idx;
 407                 disk->rdev = rdev;
 408                 disk_stack_limits(mddev->gendisk, rdev->bdev,
 409                                   rdev->data_offset << 9);
 410 
 411                 if (!test_bit(Faulty, &rdev->flags))
 412                         working_disks++;
 413         }
 414 
 415         conf->raid_disks = mddev->raid_disks;
 416         conf->mddev = mddev;
 417         spin_lock_init(&conf->device_lock);
 418         INIT_LIST_HEAD(&conf->retry_list);
 419 
 420         if (!working_disks) {
 421                 pr_warn("multipath: no operational IO paths for %s\n",
 422                         mdname(mddev));
 423                 goto out_free_conf;
 424         }
 425         mddev->degraded = conf->raid_disks - working_disks;
 426 
 427         ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
 428                                         sizeof(struct multipath_bh));
 429         if (ret)
 430                 goto out_free_conf;
 431 
 432         mddev->thread = md_register_thread(multipathd, mddev,
 433                                            "multipath");
 434         if (!mddev->thread)
 435                 goto out_free_conf;
 436 
 437         pr_info("multipath: array %s active with %d out of %d IO paths\n",
 438                 mdname(mddev), conf->raid_disks - mddev->degraded,
 439                 mddev->raid_disks);
 440         /*
 441          * Ok, everything is just fine now
 442          */
 443         md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
 444 
 445         if (md_integrity_register(mddev))
 446                 goto out_free_conf;
 447 
 448         return 0;
 449 
 450 out_free_conf:
 451         mempool_exit(&conf->pool);
 452         kfree(conf->multipaths);
 453         kfree(conf);
 454         mddev->private = NULL;
 455 out:
 456         return -EIO;
 457 }
 458 
 459 static void multipath_free(struct mddev *mddev, void *priv)
 460 {
 461         struct mpconf *conf = priv;
 462 
 463         mempool_exit(&conf->pool);
 464         kfree(conf->multipaths);
 465         kfree(conf);
 466 }
 467 
 468 static struct md_personality multipath_personality =
 469 {
 470         .name           = "multipath",
 471         .level          = LEVEL_MULTIPATH,
 472         .owner          = THIS_MODULE,
 473         .make_request   = multipath_make_request,
 474         .run            = multipath_run,
 475         .free           = multipath_free,
 476         .status         = multipath_status,
 477         .error_handler  = multipath_error,
 478         .hot_add_disk   = multipath_add_disk,
 479         .hot_remove_disk= multipath_remove_disk,
 480         .size           = multipath_size,
 481         .congested      = multipath_congested,
 482 };
 483 
 484 static int __init multipath_init (void)
 485 {
 486         return register_md_personality (&multipath_personality);
 487 }
 488 
 489 static void __exit multipath_exit (void)
 490 {
 491         unregister_md_personality (&multipath_personality);
 492 }
 493 
 494 module_init(multipath_init);
 495 module_exit(multipath_exit);
 496 MODULE_LICENSE("GPL");
 497 MODULE_DESCRIPTION("simple multi-path personality for MD");
 498 MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
 499 MODULE_ALIAS("md-multipath");
 500 MODULE_ALIAS("md-level--4");

/* [<][>][^][v][top][bottom][index][help] */