root/block/badblocks.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. badblocks_check
  2. badblocks_update_acked
  3. badblocks_set
  4. badblocks_clear
  5. ack_all_badblocks
  6. badblocks_show
  7. badblocks_store
  8. __badblocks_init
  9. badblocks_init
  10. devm_init_badblocks
  11. badblocks_exit

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Bad block management
   4  *
   5  * - Heavily based on MD badblocks code from Neil Brown
   6  *
   7  * Copyright (c) 2015, Intel Corporation.
   8  */
   9 
  10 #include <linux/badblocks.h>
  11 #include <linux/seqlock.h>
  12 #include <linux/device.h>
  13 #include <linux/kernel.h>
  14 #include <linux/module.h>
  15 #include <linux/stddef.h>
  16 #include <linux/types.h>
  17 #include <linux/slab.h>
  18 
  19 /**
  20  * badblocks_check() - check a given range for bad sectors
  21  * @bb:         the badblocks structure that holds all badblock information
  22  * @s:          sector (start) at which to check for badblocks
  23  * @sectors:    number of sectors to check for badblocks
  24  * @first_bad:  pointer to store location of the first badblock
  25  * @bad_sectors: pointer to store number of badblocks after @first_bad
  26  *
  27  * We can record which blocks on each device are 'bad' and so just
  28  * fail those blocks, or that stripe, rather than the whole device.
  29  * Entries in the bad-block table are 64bits wide.  This comprises:
  30  * Length of bad-range, in sectors: 0-511 for lengths 1-512
  31  * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
  32  *  A 'shift' can be set so that larger blocks are tracked and
  33  *  consequently larger devices can be covered.
  34  * 'Acknowledged' flag - 1 bit. - the most significant bit.
  35  *
  36  * Locking of the bad-block table uses a seqlock so badblocks_check
  37  * might need to retry if it is very unlucky.
  38  * We will sometimes want to check for bad blocks in a bi_end_io function,
  39  * so we use the write_seqlock_irq variant.
  40  *
  41  * When looking for a bad block we specify a range and want to
  42  * know if any block in the range is bad.  So we binary-search
  43  * to the last range that starts at-or-before the given endpoint,
  44  * (or "before the sector after the target range")
  45  * then see if it ends after the given start.
  46  *
  47  * Return:
  48  *  0: there are no known bad blocks in the range
  49  *  1: there are known bad block which are all acknowledged
  50  * -1: there are bad blocks which have not yet been acknowledged in metadata.
  51  * plus the start/length of the first bad section we overlap.
  52  */
  53 int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  54                         sector_t *first_bad, int *bad_sectors)
  55 {
  56         int hi;
  57         int lo;
  58         u64 *p = bb->page;
  59         int rv;
  60         sector_t target = s + sectors;
  61         unsigned seq;
  62 
  63         if (bb->shift > 0) {
  64                 /* round the start down, and the end up */
  65                 s >>= bb->shift;
  66                 target += (1<<bb->shift) - 1;
  67                 target >>= bb->shift;
  68                 sectors = target - s;
  69         }
  70         /* 'target' is now the first block after the bad range */
  71 
  72 retry:
  73         seq = read_seqbegin(&bb->lock);
  74         lo = 0;
  75         rv = 0;
  76         hi = bb->count;
  77 
  78         /* Binary search between lo and hi for 'target'
  79          * i.e. for the last range that starts before 'target'
  80          */
  81         /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
  82          * are known not to be the last range before target.
  83          * VARIANT: hi-lo is the number of possible
  84          * ranges, and decreases until it reaches 1
  85          */
  86         while (hi - lo > 1) {
  87                 int mid = (lo + hi) / 2;
  88                 sector_t a = BB_OFFSET(p[mid]);
  89 
  90                 if (a < target)
  91                         /* This could still be the one, earlier ranges
  92                          * could not.
  93                          */
  94                         lo = mid;
  95                 else
  96                         /* This and later ranges are definitely out. */
  97                         hi = mid;
  98         }
  99         /* 'lo' might be the last that started before target, but 'hi' isn't */
 100         if (hi > lo) {
 101                 /* need to check all range that end after 's' to see if
 102                  * any are unacknowledged.
 103                  */
 104                 while (lo >= 0 &&
 105                        BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
 106                         if (BB_OFFSET(p[lo]) < target) {
 107                                 /* starts before the end, and finishes after
 108                                  * the start, so they must overlap
 109                                  */
 110                                 if (rv != -1 && BB_ACK(p[lo]))
 111                                         rv = 1;
 112                                 else
 113                                         rv = -1;
 114                                 *first_bad = BB_OFFSET(p[lo]);
 115                                 *bad_sectors = BB_LEN(p[lo]);
 116                         }
 117                         lo--;
 118                 }
 119         }
 120 
 121         if (read_seqretry(&bb->lock, seq))
 122                 goto retry;
 123 
 124         return rv;
 125 }
 126 EXPORT_SYMBOL_GPL(badblocks_check);
 127 
 128 static void badblocks_update_acked(struct badblocks *bb)
 129 {
 130         u64 *p = bb->page;
 131         int i;
 132         bool unacked = false;
 133 
 134         if (!bb->unacked_exist)
 135                 return;
 136 
 137         for (i = 0; i < bb->count ; i++) {
 138                 if (!BB_ACK(p[i])) {
 139                         unacked = true;
 140                         break;
 141                 }
 142         }
 143 
 144         if (!unacked)
 145                 bb->unacked_exist = 0;
 146 }
 147 
 148 /**
 149  * badblocks_set() - Add a range of bad blocks to the table.
 150  * @bb:         the badblocks structure that holds all badblock information
 151  * @s:          first sector to mark as bad
 152  * @sectors:    number of sectors to mark as bad
 153  * @acknowledged: weather to mark the bad sectors as acknowledged
 154  *
 155  * This might extend the table, or might contract it if two adjacent ranges
 156  * can be merged. We binary-search to find the 'insertion' point, then
 157  * decide how best to handle it.
 158  *
 159  * Return:
 160  *  0: success
 161  *  1: failed to set badblocks (out of space)
 162  */
 163 int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
 164                         int acknowledged)
 165 {
 166         u64 *p;
 167         int lo, hi;
 168         int rv = 0;
 169         unsigned long flags;
 170 
 171         if (bb->shift < 0)
 172                 /* badblocks are disabled */
 173                 return 1;
 174 
 175         if (bb->shift) {
 176                 /* round the start down, and the end up */
 177                 sector_t next = s + sectors;
 178 
 179                 s >>= bb->shift;
 180                 next += (1<<bb->shift) - 1;
 181                 next >>= bb->shift;
 182                 sectors = next - s;
 183         }
 184 
 185         write_seqlock_irqsave(&bb->lock, flags);
 186 
 187         p = bb->page;
 188         lo = 0;
 189         hi = bb->count;
 190         /* Find the last range that starts at-or-before 's' */
 191         while (hi - lo > 1) {
 192                 int mid = (lo + hi) / 2;
 193                 sector_t a = BB_OFFSET(p[mid]);
 194 
 195                 if (a <= s)
 196                         lo = mid;
 197                 else
 198                         hi = mid;
 199         }
 200         if (hi > lo && BB_OFFSET(p[lo]) > s)
 201                 hi = lo;
 202 
 203         if (hi > lo) {
 204                 /* we found a range that might merge with the start
 205                  * of our new range
 206                  */
 207                 sector_t a = BB_OFFSET(p[lo]);
 208                 sector_t e = a + BB_LEN(p[lo]);
 209                 int ack = BB_ACK(p[lo]);
 210 
 211                 if (e >= s) {
 212                         /* Yes, we can merge with a previous range */
 213                         if (s == a && s + sectors >= e)
 214                                 /* new range covers old */
 215                                 ack = acknowledged;
 216                         else
 217                                 ack = ack && acknowledged;
 218 
 219                         if (e < s + sectors)
 220                                 e = s + sectors;
 221                         if (e - a <= BB_MAX_LEN) {
 222                                 p[lo] = BB_MAKE(a, e-a, ack);
 223                                 s = e;
 224                         } else {
 225                                 /* does not all fit in one range,
 226                                  * make p[lo] maximal
 227                                  */
 228                                 if (BB_LEN(p[lo]) != BB_MAX_LEN)
 229                                         p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
 230                                 s = a + BB_MAX_LEN;
 231                         }
 232                         sectors = e - s;
 233                 }
 234         }
 235         if (sectors && hi < bb->count) {
 236                 /* 'hi' points to the first range that starts after 's'.
 237                  * Maybe we can merge with the start of that range
 238                  */
 239                 sector_t a = BB_OFFSET(p[hi]);
 240                 sector_t e = a + BB_LEN(p[hi]);
 241                 int ack = BB_ACK(p[hi]);
 242 
 243                 if (a <= s + sectors) {
 244                         /* merging is possible */
 245                         if (e <= s + sectors) {
 246                                 /* full overlap */
 247                                 e = s + sectors;
 248                                 ack = acknowledged;
 249                         } else
 250                                 ack = ack && acknowledged;
 251 
 252                         a = s;
 253                         if (e - a <= BB_MAX_LEN) {
 254                                 p[hi] = BB_MAKE(a, e-a, ack);
 255                                 s = e;
 256                         } else {
 257                                 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
 258                                 s = a + BB_MAX_LEN;
 259                         }
 260                         sectors = e - s;
 261                         lo = hi;
 262                         hi++;
 263                 }
 264         }
 265         if (sectors == 0 && hi < bb->count) {
 266                 /* we might be able to combine lo and hi */
 267                 /* Note: 's' is at the end of 'lo' */
 268                 sector_t a = BB_OFFSET(p[hi]);
 269                 int lolen = BB_LEN(p[lo]);
 270                 int hilen = BB_LEN(p[hi]);
 271                 int newlen = lolen + hilen - (s - a);
 272 
 273                 if (s >= a && newlen < BB_MAX_LEN) {
 274                         /* yes, we can combine them */
 275                         int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
 276 
 277                         p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
 278                         memmove(p + hi, p + hi + 1,
 279                                 (bb->count - hi - 1) * 8);
 280                         bb->count--;
 281                 }
 282         }
 283         while (sectors) {
 284                 /* didn't merge (it all).
 285                  * Need to add a range just before 'hi'
 286                  */
 287                 if (bb->count >= MAX_BADBLOCKS) {
 288                         /* No room for more */
 289                         rv = 1;
 290                         break;
 291                 } else {
 292                         int this_sectors = sectors;
 293 
 294                         memmove(p + hi + 1, p + hi,
 295                                 (bb->count - hi) * 8);
 296                         bb->count++;
 297 
 298                         if (this_sectors > BB_MAX_LEN)
 299                                 this_sectors = BB_MAX_LEN;
 300                         p[hi] = BB_MAKE(s, this_sectors, acknowledged);
 301                         sectors -= this_sectors;
 302                         s += this_sectors;
 303                 }
 304         }
 305 
 306         bb->changed = 1;
 307         if (!acknowledged)
 308                 bb->unacked_exist = 1;
 309         else
 310                 badblocks_update_acked(bb);
 311         write_sequnlock_irqrestore(&bb->lock, flags);
 312 
 313         return rv;
 314 }
 315 EXPORT_SYMBOL_GPL(badblocks_set);
 316 
 317 /**
 318  * badblocks_clear() - Remove a range of bad blocks to the table.
 319  * @bb:         the badblocks structure that holds all badblock information
 320  * @s:          first sector to mark as bad
 321  * @sectors:    number of sectors to mark as bad
 322  *
 323  * This may involve extending the table if we spilt a region,
 324  * but it must not fail.  So if the table becomes full, we just
 325  * drop the remove request.
 326  *
 327  * Return:
 328  *  0: success
 329  *  1: failed to clear badblocks
 330  */
 331 int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
 332 {
 333         u64 *p;
 334         int lo, hi;
 335         sector_t target = s + sectors;
 336         int rv = 0;
 337 
 338         if (bb->shift > 0) {
 339                 /* When clearing we round the start up and the end down.
 340                  * This should not matter as the shift should align with
 341                  * the block size and no rounding should ever be needed.
 342                  * However it is better the think a block is bad when it
 343                  * isn't than to think a block is not bad when it is.
 344                  */
 345                 s += (1<<bb->shift) - 1;
 346                 s >>= bb->shift;
 347                 target >>= bb->shift;
 348                 sectors = target - s;
 349         }
 350 
 351         write_seqlock_irq(&bb->lock);
 352 
 353         p = bb->page;
 354         lo = 0;
 355         hi = bb->count;
 356         /* Find the last range that starts before 'target' */
 357         while (hi - lo > 1) {
 358                 int mid = (lo + hi) / 2;
 359                 sector_t a = BB_OFFSET(p[mid]);
 360 
 361                 if (a < target)
 362                         lo = mid;
 363                 else
 364                         hi = mid;
 365         }
 366         if (hi > lo) {
 367                 /* p[lo] is the last range that could overlap the
 368                  * current range.  Earlier ranges could also overlap,
 369                  * but only this one can overlap the end of the range.
 370                  */
 371                 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
 372                     (BB_OFFSET(p[lo]) < target)) {
 373                         /* Partial overlap, leave the tail of this range */
 374                         int ack = BB_ACK(p[lo]);
 375                         sector_t a = BB_OFFSET(p[lo]);
 376                         sector_t end = a + BB_LEN(p[lo]);
 377 
 378                         if (a < s) {
 379                                 /* we need to split this range */
 380                                 if (bb->count >= MAX_BADBLOCKS) {
 381                                         rv = -ENOSPC;
 382                                         goto out;
 383                                 }
 384                                 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
 385                                 bb->count++;
 386                                 p[lo] = BB_MAKE(a, s-a, ack);
 387                                 lo++;
 388                         }
 389                         p[lo] = BB_MAKE(target, end - target, ack);
 390                         /* there is no longer an overlap */
 391                         hi = lo;
 392                         lo--;
 393                 }
 394                 while (lo >= 0 &&
 395                        (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
 396                        (BB_OFFSET(p[lo]) < target)) {
 397                         /* This range does overlap */
 398                         if (BB_OFFSET(p[lo]) < s) {
 399                                 /* Keep the early parts of this range. */
 400                                 int ack = BB_ACK(p[lo]);
 401                                 sector_t start = BB_OFFSET(p[lo]);
 402 
 403                                 p[lo] = BB_MAKE(start, s - start, ack);
 404                                 /* now low doesn't overlap, so.. */
 405                                 break;
 406                         }
 407                         lo--;
 408                 }
 409                 /* 'lo' is strictly before, 'hi' is strictly after,
 410                  * anything between needs to be discarded
 411                  */
 412                 if (hi - lo > 1) {
 413                         memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
 414                         bb->count -= (hi - lo - 1);
 415                 }
 416         }
 417 
 418         badblocks_update_acked(bb);
 419         bb->changed = 1;
 420 out:
 421         write_sequnlock_irq(&bb->lock);
 422         return rv;
 423 }
 424 EXPORT_SYMBOL_GPL(badblocks_clear);
 425 
 426 /**
 427  * ack_all_badblocks() - Acknowledge all bad blocks in a list.
 428  * @bb:         the badblocks structure that holds all badblock information
 429  *
 430  * This only succeeds if ->changed is clear.  It is used by
 431  * in-kernel metadata updates
 432  */
 433 void ack_all_badblocks(struct badblocks *bb)
 434 {
 435         if (bb->page == NULL || bb->changed)
 436                 /* no point even trying */
 437                 return;
 438         write_seqlock_irq(&bb->lock);
 439 
 440         if (bb->changed == 0 && bb->unacked_exist) {
 441                 u64 *p = bb->page;
 442                 int i;
 443 
 444                 for (i = 0; i < bb->count ; i++) {
 445                         if (!BB_ACK(p[i])) {
 446                                 sector_t start = BB_OFFSET(p[i]);
 447                                 int len = BB_LEN(p[i]);
 448 
 449                                 p[i] = BB_MAKE(start, len, 1);
 450                         }
 451                 }
 452                 bb->unacked_exist = 0;
 453         }
 454         write_sequnlock_irq(&bb->lock);
 455 }
 456 EXPORT_SYMBOL_GPL(ack_all_badblocks);
 457 
 458 /**
 459  * badblocks_show() - sysfs access to bad-blocks list
 460  * @bb:         the badblocks structure that holds all badblock information
 461  * @page:       buffer received from sysfs
 462  * @unack:      weather to show unacknowledged badblocks
 463  *
 464  * Return:
 465  *  Length of returned data
 466  */
 467 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
 468 {
 469         size_t len;
 470         int i;
 471         u64 *p = bb->page;
 472         unsigned seq;
 473 
 474         if (bb->shift < 0)
 475                 return 0;
 476 
 477 retry:
 478         seq = read_seqbegin(&bb->lock);
 479 
 480         len = 0;
 481         i = 0;
 482 
 483         while (len < PAGE_SIZE && i < bb->count) {
 484                 sector_t s = BB_OFFSET(p[i]);
 485                 unsigned int length = BB_LEN(p[i]);
 486                 int ack = BB_ACK(p[i]);
 487 
 488                 i++;
 489 
 490                 if (unack && ack)
 491                         continue;
 492 
 493                 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
 494                                 (unsigned long long)s << bb->shift,
 495                                 length << bb->shift);
 496         }
 497         if (unack && len == 0)
 498                 bb->unacked_exist = 0;
 499 
 500         if (read_seqretry(&bb->lock, seq))
 501                 goto retry;
 502 
 503         return len;
 504 }
 505 EXPORT_SYMBOL_GPL(badblocks_show);
 506 
 507 /**
 508  * badblocks_store() - sysfs access to bad-blocks list
 509  * @bb:         the badblocks structure that holds all badblock information
 510  * @page:       buffer received from sysfs
 511  * @len:        length of data received from sysfs
 512  * @unack:      weather to show unacknowledged badblocks
 513  *
 514  * Return:
 515  *  Length of the buffer processed or -ve error.
 516  */
 517 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 518                         int unack)
 519 {
 520         unsigned long long sector;
 521         int length;
 522         char newline;
 523 
 524         switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
 525         case 3:
 526                 if (newline != '\n')
 527                         return -EINVAL;
 528                 /* fall through */
 529         case 2:
 530                 if (length <= 0)
 531                         return -EINVAL;
 532                 break;
 533         default:
 534                 return -EINVAL;
 535         }
 536 
 537         if (badblocks_set(bb, sector, length, !unack))
 538                 return -ENOSPC;
 539         else
 540                 return len;
 541 }
 542 EXPORT_SYMBOL_GPL(badblocks_store);
 543 
 544 static int __badblocks_init(struct device *dev, struct badblocks *bb,
 545                 int enable)
 546 {
 547         bb->dev = dev;
 548         bb->count = 0;
 549         if (enable)
 550                 bb->shift = 0;
 551         else
 552                 bb->shift = -1;
 553         if (dev)
 554                 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
 555         else
 556                 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 557         if (!bb->page) {
 558                 bb->shift = -1;
 559                 return -ENOMEM;
 560         }
 561         seqlock_init(&bb->lock);
 562 
 563         return 0;
 564 }
 565 
 566 /**
 567  * badblocks_init() - initialize the badblocks structure
 568  * @bb:         the badblocks structure that holds all badblock information
 569  * @enable:     weather to enable badblocks accounting
 570  *
 571  * Return:
 572  *  0: success
 573  *  -ve errno: on error
 574  */
 575 int badblocks_init(struct badblocks *bb, int enable)
 576 {
 577         return __badblocks_init(NULL, bb, enable);
 578 }
 579 EXPORT_SYMBOL_GPL(badblocks_init);
 580 
 581 int devm_init_badblocks(struct device *dev, struct badblocks *bb)
 582 {
 583         if (!bb)
 584                 return -EINVAL;
 585         return __badblocks_init(dev, bb, 1);
 586 }
 587 EXPORT_SYMBOL_GPL(devm_init_badblocks);
 588 
 589 /**
 590  * badblocks_exit() - free the badblocks structure
 591  * @bb:         the badblocks structure that holds all badblock information
 592  */
 593 void badblocks_exit(struct badblocks *bb)
 594 {
 595         if (!bb)
 596                 return;
 597         if (bb->dev)
 598                 devm_kfree(bb->dev, bb->page);
 599         else
 600                 kfree(bb->page);
 601         bb->page = NULL;
 602 }
 603 EXPORT_SYMBOL_GPL(badblocks_exit);

/* [<][>][^][v][top][bottom][index][help] */