root/drivers/md/dm-zoned-reclaim.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. dmz_reclaim_align_wp
  2. dmz_reclaim_kcopy_end
  3. dmz_reclaim_copy
  4. dmz_reclaim_buf
  5. dmz_reclaim_seq_data
  6. dmz_reclaim_rnd_data
  7. dmz_reclaim_empty
  8. dmz_do_reclaim
  9. dmz_target_idle
  10. dmz_should_reclaim
  11. dmz_reclaim_work
  12. dmz_ctr_reclaim
  13. dmz_dtr_reclaim
  14. dmz_suspend_reclaim
  15. dmz_resume_reclaim
  16. dmz_reclaim_bio_acc
  17. dmz_schedule_reclaim

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2017 Western Digital Corporation or its affiliates.
   4  *
   5  * This file is released under the GPL.
   6  */
   7 
   8 #include "dm-zoned.h"
   9 
  10 #include <linux/module.h>
  11 
  12 #define DM_MSG_PREFIX           "zoned reclaim"
  13 
  14 struct dmz_reclaim {
  15         struct dmz_metadata     *metadata;
  16         struct dmz_dev          *dev;
  17 
  18         struct delayed_work     work;
  19         struct workqueue_struct *wq;
  20 
  21         struct dm_kcopyd_client *kc;
  22         struct dm_kcopyd_throttle kc_throttle;
  23         int                     kc_err;
  24 
  25         unsigned long           flags;
  26 
  27         /* Last target access time */
  28         unsigned long           atime;
  29 };
  30 
  31 /*
  32  * Reclaim state flags.
  33  */
  34 enum {
  35         DMZ_RECLAIM_KCOPY,
  36 };
  37 
  38 /*
  39  * Number of seconds of target BIO inactivity to consider the target idle.
  40  */
  41 #define DMZ_IDLE_PERIOD                 (10UL * HZ)
  42 
  43 /*
  44  * Percentage of unmapped (free) random zones below which reclaim starts
  45  * even if the target is busy.
  46  */
  47 #define DMZ_RECLAIM_LOW_UNMAP_RND       30
  48 
  49 /*
  50  * Percentage of unmapped (free) random zones above which reclaim will
  51  * stop if the target is busy.
  52  */
  53 #define DMZ_RECLAIM_HIGH_UNMAP_RND      50
  54 
  55 /*
  56  * Align a sequential zone write pointer to chunk_block.
  57  */
  58 static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
  59                                 sector_t block)
  60 {
  61         struct dmz_metadata *zmd = zrc->metadata;
  62         sector_t wp_block = zone->wp_block;
  63         unsigned int nr_blocks;
  64         int ret;
  65 
  66         if (wp_block == block)
  67                 return 0;
  68 
  69         if (wp_block > block)
  70                 return -EIO;
  71 
  72         /*
  73          * Zeroout the space between the write
  74          * pointer and the requested position.
  75          */
  76         nr_blocks = block - wp_block;
  77         ret = blkdev_issue_zeroout(zrc->dev->bdev,
  78                                    dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
  79                                    dmz_blk2sect(nr_blocks), GFP_NOIO, 0);
  80         if (ret) {
  81                 dmz_dev_err(zrc->dev,
  82                             "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
  83                             dmz_id(zmd, zone), (unsigned long long)wp_block,
  84                             (unsigned long long)block, nr_blocks, ret);
  85                 dmz_check_bdev(zrc->dev);
  86                 return ret;
  87         }
  88 
  89         zone->wp_block = block;
  90 
  91         return 0;
  92 }
  93 
  94 /*
  95  * dm_kcopyd_copy end notification.
  96  */
  97 static void dmz_reclaim_kcopy_end(int read_err, unsigned long write_err,
  98                                   void *context)
  99 {
 100         struct dmz_reclaim *zrc = context;
 101 
 102         if (read_err || write_err)
 103                 zrc->kc_err = -EIO;
 104         else
 105                 zrc->kc_err = 0;
 106 
 107         clear_bit_unlock(DMZ_RECLAIM_KCOPY, &zrc->flags);
 108         smp_mb__after_atomic();
 109         wake_up_bit(&zrc->flags, DMZ_RECLAIM_KCOPY);
 110 }
 111 
 112 /*
 113  * Copy valid blocks of src_zone into dst_zone.
 114  */
 115 static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
 116                             struct dm_zone *src_zone, struct dm_zone *dst_zone)
 117 {
 118         struct dmz_metadata *zmd = zrc->metadata;
 119         struct dmz_dev *dev = zrc->dev;
 120         struct dm_io_region src, dst;
 121         sector_t block = 0, end_block;
 122         sector_t nr_blocks;
 123         sector_t src_zone_block;
 124         sector_t dst_zone_block;
 125         unsigned long flags = 0;
 126         int ret;
 127 
 128         if (dmz_is_seq(src_zone))
 129                 end_block = src_zone->wp_block;
 130         else
 131                 end_block = dev->zone_nr_blocks;
 132         src_zone_block = dmz_start_block(zmd, src_zone);
 133         dst_zone_block = dmz_start_block(zmd, dst_zone);
 134 
 135         if (dmz_is_seq(dst_zone))
 136                 set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
 137 
 138         while (block < end_block) {
 139                 if (dev->flags & DMZ_BDEV_DYING)
 140                         return -EIO;
 141 
 142                 /* Get a valid region from the source zone */
 143                 ret = dmz_first_valid_block(zmd, src_zone, &block);
 144                 if (ret <= 0)
 145                         return ret;
 146                 nr_blocks = ret;
 147 
 148                 /*
 149                  * If we are writing in a sequential zone, we must make sure
 150                  * that writes are sequential. So Zeroout any eventual hole
 151                  * between writes.
 152                  */
 153                 if (dmz_is_seq(dst_zone)) {
 154                         ret = dmz_reclaim_align_wp(zrc, dst_zone, block);
 155                         if (ret)
 156                                 return ret;
 157                 }
 158 
 159                 src.bdev = dev->bdev;
 160                 src.sector = dmz_blk2sect(src_zone_block + block);
 161                 src.count = dmz_blk2sect(nr_blocks);
 162 
 163                 dst.bdev = dev->bdev;
 164                 dst.sector = dmz_blk2sect(dst_zone_block + block);
 165                 dst.count = src.count;
 166 
 167                 /* Copy the valid region */
 168                 set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags);
 169                 dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
 170                                dmz_reclaim_kcopy_end, zrc);
 171 
 172                 /* Wait for copy to complete */
 173                 wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY,
 174                                TASK_UNINTERRUPTIBLE);
 175                 if (zrc->kc_err)
 176                         return zrc->kc_err;
 177 
 178                 block += nr_blocks;
 179                 if (dmz_is_seq(dst_zone))
 180                         dst_zone->wp_block = block;
 181         }
 182 
 183         return 0;
 184 }
 185 
 186 /*
 187  * Move valid blocks of dzone buffer zone into dzone (after its write pointer)
 188  * and free the buffer zone.
 189  */
 190 static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
 191 {
 192         struct dm_zone *bzone = dzone->bzone;
 193         sector_t chunk_block = dzone->wp_block;
 194         struct dmz_metadata *zmd = zrc->metadata;
 195         int ret;
 196 
 197         dmz_dev_debug(zrc->dev,
 198                       "Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
 199                       dzone->chunk, dmz_id(zmd, bzone), dmz_weight(bzone),
 200                       dmz_id(zmd, dzone), dmz_weight(dzone));
 201 
 202         /* Flush data zone into the buffer zone */
 203         ret = dmz_reclaim_copy(zrc, bzone, dzone);
 204         if (ret < 0)
 205                 return ret;
 206 
 207         dmz_lock_flush(zmd);
 208 
 209         /* Validate copied blocks */
 210         ret = dmz_merge_valid_blocks(zmd, bzone, dzone, chunk_block);
 211         if (ret == 0) {
 212                 /* Free the buffer zone */
 213                 dmz_invalidate_blocks(zmd, bzone, 0, zrc->dev->zone_nr_blocks);
 214                 dmz_lock_map(zmd);
 215                 dmz_unmap_zone(zmd, bzone);
 216                 dmz_unlock_zone_reclaim(dzone);
 217                 dmz_free_zone(zmd, bzone);
 218                 dmz_unlock_map(zmd);
 219         }
 220 
 221         dmz_unlock_flush(zmd);
 222 
 223         return ret;
 224 }
 225 
 226 /*
 227  * Merge valid blocks of dzone into its buffer zone and free dzone.
 228  */
 229 static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
 230 {
 231         unsigned int chunk = dzone->chunk;
 232         struct dm_zone *bzone = dzone->bzone;
 233         struct dmz_metadata *zmd = zrc->metadata;
 234         int ret = 0;
 235 
 236         dmz_dev_debug(zrc->dev,
 237                       "Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
 238                       chunk, dmz_id(zmd, dzone), dmz_weight(dzone),
 239                       dmz_id(zmd, bzone), dmz_weight(bzone));
 240 
 241         /* Flush data zone into the buffer zone */
 242         ret = dmz_reclaim_copy(zrc, dzone, bzone);
 243         if (ret < 0)
 244                 return ret;
 245 
 246         dmz_lock_flush(zmd);
 247 
 248         /* Validate copied blocks */
 249         ret = dmz_merge_valid_blocks(zmd, dzone, bzone, 0);
 250         if (ret == 0) {
 251                 /*
 252                  * Free the data zone and remap the chunk to
 253                  * the buffer zone.
 254                  */
 255                 dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks);
 256                 dmz_lock_map(zmd);
 257                 dmz_unmap_zone(zmd, bzone);
 258                 dmz_unmap_zone(zmd, dzone);
 259                 dmz_unlock_zone_reclaim(dzone);
 260                 dmz_free_zone(zmd, dzone);
 261                 dmz_map_zone(zmd, bzone, chunk);
 262                 dmz_unlock_map(zmd);
 263         }
 264 
 265         dmz_unlock_flush(zmd);
 266 
 267         return ret;
 268 }
 269 
 270 /*
 271  * Move valid blocks of the random data zone dzone into a free sequential zone.
 272  * Once blocks are moved, remap the zone chunk to the sequential zone.
 273  */
 274 static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
 275 {
 276         unsigned int chunk = dzone->chunk;
 277         struct dm_zone *szone = NULL;
 278         struct dmz_metadata *zmd = zrc->metadata;
 279         int ret;
 280 
 281         /* Get a free sequential zone */
 282         dmz_lock_map(zmd);
 283         szone = dmz_alloc_zone(zmd, DMZ_ALLOC_RECLAIM);
 284         dmz_unlock_map(zmd);
 285         if (!szone)
 286                 return -ENOSPC;
 287 
 288         dmz_dev_debug(zrc->dev,
 289                       "Chunk %u, move rnd zone %u (weight %u) to seq zone %u",
 290                       chunk, dmz_id(zmd, dzone), dmz_weight(dzone),
 291                       dmz_id(zmd, szone));
 292 
 293         /* Flush the random data zone into the sequential zone */
 294         ret = dmz_reclaim_copy(zrc, dzone, szone);
 295 
 296         dmz_lock_flush(zmd);
 297 
 298         if (ret == 0) {
 299                 /* Validate copied blocks */
 300                 ret = dmz_copy_valid_blocks(zmd, dzone, szone);
 301         }
 302         if (ret) {
 303                 /* Free the sequential zone */
 304                 dmz_lock_map(zmd);
 305                 dmz_free_zone(zmd, szone);
 306                 dmz_unlock_map(zmd);
 307         } else {
 308                 /* Free the data zone and remap the chunk */
 309                 dmz_invalidate_blocks(zmd, dzone, 0, zrc->dev->zone_nr_blocks);
 310                 dmz_lock_map(zmd);
 311                 dmz_unmap_zone(zmd, dzone);
 312                 dmz_unlock_zone_reclaim(dzone);
 313                 dmz_free_zone(zmd, dzone);
 314                 dmz_map_zone(zmd, szone, chunk);
 315                 dmz_unlock_map(zmd);
 316         }
 317 
 318         dmz_unlock_flush(zmd);
 319 
 320         return ret;
 321 }
 322 
 323 /*
 324  * Reclaim an empty zone.
 325  */
 326 static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone)
 327 {
 328         struct dmz_metadata *zmd = zrc->metadata;
 329 
 330         dmz_lock_flush(zmd);
 331         dmz_lock_map(zmd);
 332         dmz_unmap_zone(zmd, dzone);
 333         dmz_unlock_zone_reclaim(dzone);
 334         dmz_free_zone(zmd, dzone);
 335         dmz_unlock_map(zmd);
 336         dmz_unlock_flush(zmd);
 337 }
 338 
 339 /*
 340  * Find a candidate zone for reclaim and process it.
 341  */
 342 static int dmz_do_reclaim(struct dmz_reclaim *zrc)
 343 {
 344         struct dmz_metadata *zmd = zrc->metadata;
 345         struct dm_zone *dzone;
 346         struct dm_zone *rzone;
 347         unsigned long start;
 348         int ret;
 349 
 350         /* Get a data zone */
 351         dzone = dmz_get_zone_for_reclaim(zmd);
 352         if (IS_ERR(dzone))
 353                 return PTR_ERR(dzone);
 354 
 355         start = jiffies;
 356 
 357         if (dmz_is_rnd(dzone)) {
 358                 if (!dmz_weight(dzone)) {
 359                         /* Empty zone */
 360                         dmz_reclaim_empty(zrc, dzone);
 361                         ret = 0;
 362                 } else {
 363                         /*
 364                          * Reclaim the random data zone by moving its
 365                          * valid data blocks to a free sequential zone.
 366                          */
 367                         ret = dmz_reclaim_rnd_data(zrc, dzone);
 368                 }
 369                 rzone = dzone;
 370 
 371         } else {
 372                 struct dm_zone *bzone = dzone->bzone;
 373                 sector_t chunk_block = 0;
 374 
 375                 ret = dmz_first_valid_block(zmd, bzone, &chunk_block);
 376                 if (ret < 0)
 377                         goto out;
 378 
 379                 if (ret == 0 || chunk_block >= dzone->wp_block) {
 380                         /*
 381                          * The buffer zone is empty or its valid blocks are
 382                          * after the data zone write pointer.
 383                          */
 384                         ret = dmz_reclaim_buf(zrc, dzone);
 385                         rzone = bzone;
 386                 } else {
 387                         /*
 388                          * Reclaim the data zone by merging it into the
 389                          * buffer zone so that the buffer zone itself can
 390                          * be later reclaimed.
 391                          */
 392                         ret = dmz_reclaim_seq_data(zrc, dzone);
 393                         rzone = dzone;
 394                 }
 395         }
 396 out:
 397         if (ret) {
 398                 dmz_unlock_zone_reclaim(dzone);
 399                 return ret;
 400         }
 401 
 402         ret = dmz_flush_metadata(zrc->metadata);
 403         if (ret) {
 404                 dmz_dev_debug(zrc->dev,
 405                               "Metadata flush for zone %u failed, err %d\n",
 406                               dmz_id(zmd, rzone), ret);
 407                 return ret;
 408         }
 409 
 410         dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms",
 411                       dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start));
 412         return 0;
 413 }
 414 
 415 /*
 416  * Test if the target device is idle.
 417  */
 418 static inline int dmz_target_idle(struct dmz_reclaim *zrc)
 419 {
 420         return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD);
 421 }
 422 
 423 /*
 424  * Test if reclaim is necessary.
 425  */
 426 static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
 427 {
 428         struct dmz_metadata *zmd = zrc->metadata;
 429         unsigned int nr_rnd = dmz_nr_rnd_zones(zmd);
 430         unsigned int nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
 431         unsigned int p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
 432 
 433         /* Reclaim when idle */
 434         if (dmz_target_idle(zrc) && nr_unmap_rnd < nr_rnd)
 435                 return true;
 436 
 437         /* If there are still plenty of random zones, do not reclaim */
 438         if (p_unmap_rnd >= DMZ_RECLAIM_HIGH_UNMAP_RND)
 439                 return false;
 440 
 441         /*
 442          * If the percentage of unmapped random zones is low,
 443          * reclaim even if the target is busy.
 444          */
 445         return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND;
 446 }
 447 
 448 /*
 449  * Reclaim work function.
 450  */
 451 static void dmz_reclaim_work(struct work_struct *work)
 452 {
 453         struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
 454         struct dmz_metadata *zmd = zrc->metadata;
 455         unsigned int nr_rnd, nr_unmap_rnd;
 456         unsigned int p_unmap_rnd;
 457         int ret;
 458 
 459         if (dmz_bdev_is_dying(zrc->dev))
 460                 return;
 461 
 462         if (!dmz_should_reclaim(zrc)) {
 463                 mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
 464                 return;
 465         }
 466 
 467         /*
 468          * We need to start reclaiming random zones: set up zone copy
 469          * throttling to either go fast if we are very low on random zones
 470          * and slower if there are still some free random zones to avoid
 471          * as much as possible to negatively impact the user workload.
 472          */
 473         nr_rnd = dmz_nr_rnd_zones(zmd);
 474         nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
 475         p_unmap_rnd = nr_unmap_rnd * 100 / nr_rnd;
 476         if (dmz_target_idle(zrc) || p_unmap_rnd < DMZ_RECLAIM_LOW_UNMAP_RND / 2) {
 477                 /* Idle or very low percentage: go fast */
 478                 zrc->kc_throttle.throttle = 100;
 479         } else {
 480                 /* Busy but we still have some random zone: throttle */
 481                 zrc->kc_throttle.throttle = min(75U, 100U - p_unmap_rnd / 2);
 482         }
 483 
 484         dmz_dev_debug(zrc->dev,
 485                       "Reclaim (%u): %s, %u%% free rnd zones (%u/%u)",
 486                       zrc->kc_throttle.throttle,
 487                       (dmz_target_idle(zrc) ? "Idle" : "Busy"),
 488                       p_unmap_rnd, nr_unmap_rnd, nr_rnd);
 489 
 490         ret = dmz_do_reclaim(zrc);
 491         if (ret) {
 492                 dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
 493                 if (!dmz_check_bdev(zrc->dev))
 494                         return;
 495         }
 496 
 497         dmz_schedule_reclaim(zrc);
 498 }
 499 
 500 /*
 501  * Initialize reclaim.
 502  */
 503 int dmz_ctr_reclaim(struct dmz_dev *dev, struct dmz_metadata *zmd,
 504                     struct dmz_reclaim **reclaim)
 505 {
 506         struct dmz_reclaim *zrc;
 507         int ret;
 508 
 509         zrc = kzalloc(sizeof(struct dmz_reclaim), GFP_KERNEL);
 510         if (!zrc)
 511                 return -ENOMEM;
 512 
 513         zrc->dev = dev;
 514         zrc->metadata = zmd;
 515         zrc->atime = jiffies;
 516 
 517         /* Reclaim kcopyd client */
 518         zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
 519         if (IS_ERR(zrc->kc)) {
 520                 ret = PTR_ERR(zrc->kc);
 521                 zrc->kc = NULL;
 522                 goto err;
 523         }
 524 
 525         /* Reclaim work */
 526         INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
 527         zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM,
 528                                           dev->name);
 529         if (!zrc->wq) {
 530                 ret = -ENOMEM;
 531                 goto err;
 532         }
 533 
 534         *reclaim = zrc;
 535         queue_delayed_work(zrc->wq, &zrc->work, 0);
 536 
 537         return 0;
 538 err:
 539         if (zrc->kc)
 540                 dm_kcopyd_client_destroy(zrc->kc);
 541         kfree(zrc);
 542 
 543         return ret;
 544 }
 545 
 546 /*
 547  * Terminate reclaim.
 548  */
 549 void dmz_dtr_reclaim(struct dmz_reclaim *zrc)
 550 {
 551         cancel_delayed_work_sync(&zrc->work);
 552         destroy_workqueue(zrc->wq);
 553         dm_kcopyd_client_destroy(zrc->kc);
 554         kfree(zrc);
 555 }
 556 
 557 /*
 558  * Suspend reclaim.
 559  */
 560 void dmz_suspend_reclaim(struct dmz_reclaim *zrc)
 561 {
 562         cancel_delayed_work_sync(&zrc->work);
 563 }
 564 
 565 /*
 566  * Resume reclaim.
 567  */
 568 void dmz_resume_reclaim(struct dmz_reclaim *zrc)
 569 {
 570         queue_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
 571 }
 572 
 573 /*
 574  * BIO accounting.
 575  */
 576 void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc)
 577 {
 578         zrc->atime = jiffies;
 579 }
 580 
 581 /*
 582  * Start reclaim if necessary.
 583  */
 584 void dmz_schedule_reclaim(struct dmz_reclaim *zrc)
 585 {
 586         if (dmz_should_reclaim(zrc))
 587                 mod_delayed_work(zrc->wq, &zrc->work, 0);
 588 }
 589 

/* [<][>][^][v][top][bottom][index][help] */