root/fs/ubifs/gc.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. switch_gc_head
  2. data_nodes_cmp
  3. nondata_nodes_cmp
  4. sort_nodes
  5. move_node
  6. move_nodes
  7. gc_sync_wbufs
  8. ubifs_garbage_collect_leb
  9. ubifs_garbage_collect
  10. ubifs_gc_start_commit
  11. ubifs_gc_end_commit
  12. ubifs_destroy_idx_gc
  13. ubifs_get_idx_gc_leb

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * This file is part of UBIFS.
   4  *
   5  * Copyright (C) 2006-2008 Nokia Corporation.
   6  *
   7  * Authors: Adrian Hunter
   8  *          Artem Bityutskiy (Битюцкий Артём)
   9  */
  10 
  11 /*
  12  * This file implements garbage collection. The procedure for garbage collection
  13  * is different depending on whether a LEB as an index LEB (contains index
  14  * nodes) or not. For non-index LEBs, garbage collection finds a LEB which
  15  * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete
  16  * nodes to the journal, at which point the garbage-collected LEB is free to be
  17  * reused. For index LEBs, garbage collection marks the non-obsolete index nodes
  18  * dirty in the TNC, and after the next commit, the garbage-collected LEB is
  19  * to be reused. Garbage collection will cause the number of dirty index nodes
  20  * to grow, however sufficient space is reserved for the index to ensure the
  21  * commit will never run out of space.
  22  *
  23  * Notes about dead watermark. At current UBIFS implementation we assume that
  24  * LEBs which have less than @c->dead_wm bytes of free + dirty space are full
  25  * and not worth garbage-collecting. The dead watermark is one min. I/O unit
  26  * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS
  27  * Garbage Collector has to synchronize the GC head's write buffer before
  28  * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can
  29  * actually reclaim even very small pieces of dirty space by garbage collecting
  30  * enough dirty LEBs, but we do not bother doing this at this implementation.
  31  *
  32  * Notes about dark watermark. The results of GC work depends on how big are
  33  * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed,
  34  * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would
  35  * have to waste large pieces of free space at the end of LEB B, because nodes
  36  * from LEB A would not fit. And the worst situation is when all nodes are of
  37  * maximum size. So dark watermark is the amount of free + dirty space in LEB
  38  * which are guaranteed to be reclaimable. If LEB has less space, the GC might
  39  * be unable to reclaim it. So, LEBs with free + dirty greater than dark
  40  * watermark are "good" LEBs from GC's point of view. The other LEBs are not so
  41  * good, and GC takes extra care when moving them.
  42  */
  43 
  44 #include <linux/slab.h>
  45 #include <linux/pagemap.h>
  46 #include <linux/list_sort.h>
  47 #include "ubifs.h"
  48 
  49 /*
  50  * GC may need to move more than one LEB to make progress. The below constants
  51  * define "soft" and "hard" limits on the number of LEBs the garbage collector
  52  * may move.
  53  */
  54 #define SOFT_LEBS_LIMIT 4
  55 #define HARD_LEBS_LIMIT 32
  56 
  57 /**
  58  * switch_gc_head - switch the garbage collection journal head.
  59  * @c: UBIFS file-system description object
  60  * @buf: buffer to write
  61  * @len: length of the buffer to write
  62  * @lnum: LEB number written is returned here
  63  * @offs: offset written is returned here
  64  *
  65  * This function switch the GC head to the next LEB which is reserved in
  66  * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required,
  67  * and other negative error code in case of failures.
  68  */
  69 static int switch_gc_head(struct ubifs_info *c)
  70 {
  71         int err, gc_lnum = c->gc_lnum;
  72         struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
  73 
  74         ubifs_assert(c, gc_lnum != -1);
  75         dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)",
  76                wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum,
  77                c->leb_size - wbuf->offs - wbuf->used);
  78 
  79         err = ubifs_wbuf_sync_nolock(wbuf);
  80         if (err)
  81                 return err;
  82 
  83         /*
  84          * The GC write-buffer was synchronized, we may safely unmap
  85          * 'c->gc_lnum'.
  86          */
  87         err = ubifs_leb_unmap(c, gc_lnum);
  88         if (err)
  89                 return err;
  90 
  91         err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
  92         if (err)
  93                 return err;
  94 
  95         c->gc_lnum = -1;
  96         err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0);
  97         return err;
  98 }
  99 
 100 /**
 101  * data_nodes_cmp - compare 2 data nodes.
 102  * @priv: UBIFS file-system description object
 103  * @a: first data node
 104  * @b: second data node
 105  *
 106  * This function compares data nodes @a and @b. Returns %1 if @a has greater
 107  * inode or block number, and %-1 otherwise.
 108  */
 109 static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
 110 {
 111         ino_t inuma, inumb;
 112         struct ubifs_info *c = priv;
 113         struct ubifs_scan_node *sa, *sb;
 114 
 115         cond_resched();
 116         if (a == b)
 117                 return 0;
 118 
 119         sa = list_entry(a, struct ubifs_scan_node, list);
 120         sb = list_entry(b, struct ubifs_scan_node, list);
 121 
 122         ubifs_assert(c, key_type(c, &sa->key) == UBIFS_DATA_KEY);
 123         ubifs_assert(c, key_type(c, &sb->key) == UBIFS_DATA_KEY);
 124         ubifs_assert(c, sa->type == UBIFS_DATA_NODE);
 125         ubifs_assert(c, sb->type == UBIFS_DATA_NODE);
 126 
 127         inuma = key_inum(c, &sa->key);
 128         inumb = key_inum(c, &sb->key);
 129 
 130         if (inuma == inumb) {
 131                 unsigned int blka = key_block(c, &sa->key);
 132                 unsigned int blkb = key_block(c, &sb->key);
 133 
 134                 if (blka <= blkb)
 135                         return -1;
 136         } else if (inuma <= inumb)
 137                 return -1;
 138 
 139         return 1;
 140 }
 141 
 142 /*
 143  * nondata_nodes_cmp - compare 2 non-data nodes.
 144  * @priv: UBIFS file-system description object
 145  * @a: first node
 146  * @a: second node
 147  *
 148  * This function compares nodes @a and @b. It makes sure that inode nodes go
 149  * first and sorted by length in descending order. Directory entry nodes go
 150  * after inode nodes and are sorted in ascending hash valuer order.
 151  */
 152 static int nondata_nodes_cmp(void *priv, struct list_head *a,
 153                              struct list_head *b)
 154 {
 155         ino_t inuma, inumb;
 156         struct ubifs_info *c = priv;
 157         struct ubifs_scan_node *sa, *sb;
 158 
 159         cond_resched();
 160         if (a == b)
 161                 return 0;
 162 
 163         sa = list_entry(a, struct ubifs_scan_node, list);
 164         sb = list_entry(b, struct ubifs_scan_node, list);
 165 
 166         ubifs_assert(c, key_type(c, &sa->key) != UBIFS_DATA_KEY &&
 167                      key_type(c, &sb->key) != UBIFS_DATA_KEY);
 168         ubifs_assert(c, sa->type != UBIFS_DATA_NODE &&
 169                      sb->type != UBIFS_DATA_NODE);
 170 
 171         /* Inodes go before directory entries */
 172         if (sa->type == UBIFS_INO_NODE) {
 173                 if (sb->type == UBIFS_INO_NODE)
 174                         return sb->len - sa->len;
 175                 return -1;
 176         }
 177         if (sb->type == UBIFS_INO_NODE)
 178                 return 1;
 179 
 180         ubifs_assert(c, key_type(c, &sa->key) == UBIFS_DENT_KEY ||
 181                      key_type(c, &sa->key) == UBIFS_XENT_KEY);
 182         ubifs_assert(c, key_type(c, &sb->key) == UBIFS_DENT_KEY ||
 183                      key_type(c, &sb->key) == UBIFS_XENT_KEY);
 184         ubifs_assert(c, sa->type == UBIFS_DENT_NODE ||
 185                      sa->type == UBIFS_XENT_NODE);
 186         ubifs_assert(c, sb->type == UBIFS_DENT_NODE ||
 187                      sb->type == UBIFS_XENT_NODE);
 188 
 189         inuma = key_inum(c, &sa->key);
 190         inumb = key_inum(c, &sb->key);
 191 
 192         if (inuma == inumb) {
 193                 uint32_t hasha = key_hash(c, &sa->key);
 194                 uint32_t hashb = key_hash(c, &sb->key);
 195 
 196                 if (hasha <= hashb)
 197                         return -1;
 198         } else if (inuma <= inumb)
 199                 return -1;
 200 
 201         return 1;
 202 }
 203 
 204 /**
 205  * sort_nodes - sort nodes for GC.
 206  * @c: UBIFS file-system description object
 207  * @sleb: describes nodes to sort and contains the result on exit
 208  * @nondata: contains non-data nodes on exit
 209  * @min: minimum node size is returned here
 210  *
 211  * This function sorts the list of inodes to garbage collect. First of all, it
 212  * kills obsolete nodes and separates data and non-data nodes to the
 213  * @sleb->nodes and @nondata lists correspondingly.
 214  *
 215  * Data nodes are then sorted in block number order - this is important for
 216  * bulk-read; data nodes with lower inode number go before data nodes with
 217  * higher inode number, and data nodes with lower block number go before data
 218  * nodes with higher block number;
 219  *
 220  * Non-data nodes are sorted as follows.
 221  *   o First go inode nodes - they are sorted in descending length order.
 222  *   o Then go directory entry nodes - they are sorted in hash order, which
 223  *     should supposedly optimize 'readdir()'. Direntry nodes with lower parent
 224  *     inode number go before direntry nodes with higher parent inode number,
 225  *     and direntry nodes with lower name hash values go before direntry nodes
 226  *     with higher name hash values.
 227  *
 228  * This function returns zero in case of success and a negative error code in
 229  * case of failure.
 230  */
 231 static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 232                       struct list_head *nondata, int *min)
 233 {
 234         int err;
 235         struct ubifs_scan_node *snod, *tmp;
 236 
 237         *min = INT_MAX;
 238 
 239         /* Separate data nodes and non-data nodes */
 240         list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
 241                 ubifs_assert(c, snod->type == UBIFS_INO_NODE  ||
 242                              snod->type == UBIFS_DATA_NODE ||
 243                              snod->type == UBIFS_DENT_NODE ||
 244                              snod->type == UBIFS_XENT_NODE ||
 245                              snod->type == UBIFS_TRUN_NODE ||
 246                              snod->type == UBIFS_AUTH_NODE);
 247 
 248                 if (snod->type != UBIFS_INO_NODE  &&
 249                     snod->type != UBIFS_DATA_NODE &&
 250                     snod->type != UBIFS_DENT_NODE &&
 251                     snod->type != UBIFS_XENT_NODE) {
 252                         /* Probably truncation node, zap it */
 253                         list_del(&snod->list);
 254                         kfree(snod);
 255                         continue;
 256                 }
 257 
 258                 ubifs_assert(c, key_type(c, &snod->key) == UBIFS_DATA_KEY ||
 259                              key_type(c, &snod->key) == UBIFS_INO_KEY  ||
 260                              key_type(c, &snod->key) == UBIFS_DENT_KEY ||
 261                              key_type(c, &snod->key) == UBIFS_XENT_KEY);
 262 
 263                 err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
 264                                          snod->offs, 0);
 265                 if (err < 0)
 266                         return err;
 267 
 268                 if (!err) {
 269                         /* The node is obsolete, remove it from the list */
 270                         list_del(&snod->list);
 271                         kfree(snod);
 272                         continue;
 273                 }
 274 
 275                 if (snod->len < *min)
 276                         *min = snod->len;
 277 
 278                 if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
 279                         list_move_tail(&snod->list, nondata);
 280         }
 281 
 282         /* Sort data and non-data nodes */
 283         list_sort(c, &sleb->nodes, &data_nodes_cmp);
 284         list_sort(c, nondata, &nondata_nodes_cmp);
 285 
 286         err = dbg_check_data_nodes_order(c, &sleb->nodes);
 287         if (err)
 288                 return err;
 289         err = dbg_check_nondata_nodes_order(c, nondata);
 290         if (err)
 291                 return err;
 292         return 0;
 293 }
 294 
 295 /**
 296  * move_node - move a node.
 297  * @c: UBIFS file-system description object
 298  * @sleb: describes the LEB to move nodes from
 299  * @snod: the mode to move
 300  * @wbuf: write-buffer to move node to
 301  *
 302  * This function moves node @snod to @wbuf, changes TNC correspondingly, and
 303  * destroys @snod. Returns zero in case of success and a negative error code in
 304  * case of failure.
 305  */
 306 static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 307                      struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
 308 {
 309         int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
 310 
 311         cond_resched();
 312         err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
 313         if (err)
 314                 return err;
 315 
 316         err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
 317                                 snod->offs, new_lnum, new_offs,
 318                                 snod->len);
 319         list_del(&snod->list);
 320         kfree(snod);
 321         return err;
 322 }
 323 
 324 /**
 325  * move_nodes - move nodes.
 326  * @c: UBIFS file-system description object
 327  * @sleb: describes the LEB to move nodes from
 328  *
 329  * This function moves valid nodes from data LEB described by @sleb to the GC
 330  * journal head. This function returns zero in case of success, %-EAGAIN if
 331  * commit is required, and other negative error codes in case of other
 332  * failures.
 333  */
 334 static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 335 {
 336         int err, min;
 337         LIST_HEAD(nondata);
 338         struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
 339 
 340         if (wbuf->lnum == -1) {
 341                 /*
 342                  * The GC journal head is not set, because it is the first GC
 343                  * invocation since mount.
 344                  */
 345                 err = switch_gc_head(c);
 346                 if (err)
 347                         return err;
 348         }
 349 
 350         err = sort_nodes(c, sleb, &nondata, &min);
 351         if (err)
 352                 goto out;
 353 
 354         /* Write nodes to their new location. Use the first-fit strategy */
 355         while (1) {
 356                 int avail, moved = 0;
 357                 struct ubifs_scan_node *snod, *tmp;
 358 
 359                 /* Move data nodes */
 360                 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
 361                         avail = c->leb_size - wbuf->offs - wbuf->used -
 362                                         ubifs_auth_node_sz(c);
 363                         if  (snod->len > avail)
 364                                 /*
 365                                  * Do not skip data nodes in order to optimize
 366                                  * bulk-read.
 367                                  */
 368                                 break;
 369 
 370                         err = ubifs_shash_update(c, c->jheads[GCHD].log_hash,
 371                                                  snod->node, snod->len);
 372                         if (err)
 373                                 goto out;
 374 
 375                         err = move_node(c, sleb, snod, wbuf);
 376                         if (err)
 377                                 goto out;
 378                         moved = 1;
 379                 }
 380 
 381                 /* Move non-data nodes */
 382                 list_for_each_entry_safe(snod, tmp, &nondata, list) {
 383                         avail = c->leb_size - wbuf->offs - wbuf->used -
 384                                         ubifs_auth_node_sz(c);
 385                         if (avail < min)
 386                                 break;
 387 
 388                         if  (snod->len > avail) {
 389                                 /*
 390                                  * Keep going only if this is an inode with
 391                                  * some data. Otherwise stop and switch the GC
 392                                  * head. IOW, we assume that data-less inode
 393                                  * nodes and direntry nodes are roughly of the
 394                                  * same size.
 395                                  */
 396                                 if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
 397                                     snod->len == UBIFS_INO_NODE_SZ)
 398                                         break;
 399                                 continue;
 400                         }
 401 
 402                         err = ubifs_shash_update(c, c->jheads[GCHD].log_hash,
 403                                                  snod->node, snod->len);
 404                         if (err)
 405                                 goto out;
 406 
 407                         err = move_node(c, sleb, snod, wbuf);
 408                         if (err)
 409                                 goto out;
 410                         moved = 1;
 411                 }
 412 
 413                 if (ubifs_authenticated(c) && moved) {
 414                         struct ubifs_auth_node *auth;
 415 
 416                         auth = kmalloc(ubifs_auth_node_sz(c), GFP_NOFS);
 417                         if (!auth) {
 418                                 err = -ENOMEM;
 419                                 goto out;
 420                         }
 421 
 422                         err = ubifs_prepare_auth_node(c, auth,
 423                                                 c->jheads[GCHD].log_hash);
 424                         if (err) {
 425                                 kfree(auth);
 426                                 goto out;
 427                         }
 428 
 429                         err = ubifs_wbuf_write_nolock(wbuf, auth,
 430                                                       ubifs_auth_node_sz(c));
 431                         if (err) {
 432                                 kfree(auth);
 433                                 goto out;
 434                         }
 435 
 436                         ubifs_add_dirt(c, wbuf->lnum, ubifs_auth_node_sz(c));
 437                 }
 438 
 439                 if (list_empty(&sleb->nodes) && list_empty(&nondata))
 440                         break;
 441 
 442                 /*
 443                  * Waste the rest of the space in the LEB and switch to the
 444                  * next LEB.
 445                  */
 446                 err = switch_gc_head(c);
 447                 if (err)
 448                         goto out;
 449         }
 450 
 451         return 0;
 452 
 453 out:
 454         list_splice_tail(&nondata, &sleb->nodes);
 455         return err;
 456 }
 457 
 458 /**
 459  * gc_sync_wbufs - sync write-buffers for GC.
 460  * @c: UBIFS file-system description object
 461  *
 462  * We must guarantee that obsoleting nodes are on flash. Unfortunately they may
 463  * be in a write-buffer instead. That is, a node could be written to a
 464  * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is
 465  * erased before the write-buffer is sync'd and then there is an unclean
 466  * unmount, then an existing node is lost. To avoid this, we sync all
 467  * write-buffers.
 468  *
 469  * This function returns %0 on success or a negative error code on failure.
 470  */
 471 static int gc_sync_wbufs(struct ubifs_info *c)
 472 {
 473         int err, i;
 474 
 475         for (i = 0; i < c->jhead_cnt; i++) {
 476                 if (i == GCHD)
 477                         continue;
 478                 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
 479                 if (err)
 480                         return err;
 481         }
 482         return 0;
 483 }
 484 
 485 /**
 486  * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock.
 487  * @c: UBIFS file-system description object
 488  * @lp: describes the LEB to garbage collect
 489  *
 490  * This function garbage-collects an LEB and returns one of the @LEB_FREED,
 491  * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is
 492  * required, and other negative error codes in case of failures.
 493  */
 494 int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
 495 {
 496         struct ubifs_scan_leb *sleb;
 497         struct ubifs_scan_node *snod;
 498         struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
 499         int err = 0, lnum = lp->lnum;
 500 
 501         ubifs_assert(c, c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
 502                      c->need_recovery);
 503         ubifs_assert(c, c->gc_lnum != lnum);
 504         ubifs_assert(c, wbuf->lnum != lnum);
 505 
 506         if (lp->free + lp->dirty == c->leb_size) {
 507                 /* Special case - a free LEB  */
 508                 dbg_gc("LEB %d is free, return it", lp->lnum);
 509                 ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
 510 
 511                 if (lp->free != c->leb_size) {
 512                         /*
 513                          * Write buffers must be sync'd before unmapping
 514                          * freeable LEBs, because one of them may contain data
 515                          * which obsoletes something in 'lp->lnum'.
 516                          */
 517                         err = gc_sync_wbufs(c);
 518                         if (err)
 519                                 return err;
 520                         err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
 521                                                   0, 0, 0, 0);
 522                         if (err)
 523                                 return err;
 524                 }
 525                 err = ubifs_leb_unmap(c, lp->lnum);
 526                 if (err)
 527                         return err;
 528 
 529                 if (c->gc_lnum == -1) {
 530                         c->gc_lnum = lnum;
 531                         return LEB_RETAINED;
 532                 }
 533 
 534                 return LEB_FREED;
 535         }
 536 
 537         /*
 538          * We scan the entire LEB even though we only really need to scan up to
 539          * (c->leb_size - lp->free).
 540          */
 541         sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
 542         if (IS_ERR(sleb))
 543                 return PTR_ERR(sleb);
 544 
 545         ubifs_assert(c, !list_empty(&sleb->nodes));
 546         snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
 547 
 548         if (snod->type == UBIFS_IDX_NODE) {
 549                 struct ubifs_gced_idx_leb *idx_gc;
 550 
 551                 dbg_gc("indexing LEB %d (free %d, dirty %d)",
 552                        lnum, lp->free, lp->dirty);
 553                 list_for_each_entry(snod, &sleb->nodes, list) {
 554                         struct ubifs_idx_node *idx = snod->node;
 555                         int level = le16_to_cpu(idx->level);
 556 
 557                         ubifs_assert(c, snod->type == UBIFS_IDX_NODE);
 558                         key_read(c, ubifs_idx_key(c, idx), &snod->key);
 559                         err = ubifs_dirty_idx_node(c, &snod->key, level, lnum,
 560                                                    snod->offs);
 561                         if (err)
 562                                 goto out;
 563                 }
 564 
 565                 idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
 566                 if (!idx_gc) {
 567                         err = -ENOMEM;
 568                         goto out;
 569                 }
 570 
 571                 idx_gc->lnum = lnum;
 572                 idx_gc->unmap = 0;
 573                 list_add(&idx_gc->list, &c->idx_gc);
 574 
 575                 /*
 576                  * Don't release the LEB until after the next commit, because
 577                  * it may contain data which is needed for recovery. So
 578                  * although we freed this LEB, it will become usable only after
 579                  * the commit.
 580                  */
 581                 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0,
 582                                           LPROPS_INDEX, 1);
 583                 if (err)
 584                         goto out;
 585                 err = LEB_FREED_IDX;
 586         } else {
 587                 dbg_gc("data LEB %d (free %d, dirty %d)",
 588                        lnum, lp->free, lp->dirty);
 589 
 590                 err = move_nodes(c, sleb);
 591                 if (err)
 592                         goto out_inc_seq;
 593 
 594                 err = gc_sync_wbufs(c);
 595                 if (err)
 596                         goto out_inc_seq;
 597 
 598                 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
 599                 if (err)
 600                         goto out_inc_seq;
 601 
 602                 /* Allow for races with TNC */
 603                 c->gced_lnum = lnum;
 604                 smp_wmb();
 605                 c->gc_seq += 1;
 606                 smp_wmb();
 607 
 608                 if (c->gc_lnum == -1) {
 609                         c->gc_lnum = lnum;
 610                         err = LEB_RETAINED;
 611                 } else {
 612                         err = ubifs_wbuf_sync_nolock(wbuf);
 613                         if (err)
 614                                 goto out;
 615 
 616                         err = ubifs_leb_unmap(c, lnum);
 617                         if (err)
 618                                 goto out;
 619 
 620                         err = LEB_FREED;
 621                 }
 622         }
 623 
 624 out:
 625         ubifs_scan_destroy(sleb);
 626         return err;
 627 
 628 out_inc_seq:
 629         /* We may have moved at least some nodes so allow for races with TNC */
 630         c->gced_lnum = lnum;
 631         smp_wmb();
 632         c->gc_seq += 1;
 633         smp_wmb();
 634         goto out;
 635 }
 636 
 637 /**
 638  * ubifs_garbage_collect - UBIFS garbage collector.
 639  * @c: UBIFS file-system description object
 640  * @anyway: do GC even if there are free LEBs
 641  *
 642  * This function does out-of-place garbage collection. The return codes are:
 643  *   o positive LEB number if the LEB has been freed and may be used;
 644  *   o %-EAGAIN if the caller has to run commit;
 645  *   o %-ENOSPC if GC failed to make any progress;
 646  *   o other negative error codes in case of other errors.
 647  *
 648  * Garbage collector writes data to the journal when GC'ing data LEBs, and just
 649  * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point
 650  * commit may be required. But commit cannot be run from inside GC, because the
 651  * caller might be holding the commit lock, so %-EAGAIN is returned instead;
 652  * And this error code means that the caller has to run commit, and re-run GC
 653  * if there is still no free space.
 654  *
 655  * There are many reasons why this function may return %-EAGAIN:
 656  * o the log is full and there is no space to write an LEB reference for
 657  *   @c->gc_lnum;
 658  * o the journal is too large and exceeds size limitations;
 659  * o GC moved indexing LEBs, but they can be used only after the commit;
 660  * o the shrinker fails to find clean znodes to free and requests the commit;
 661  * o etc.
 662  *
 663  * Note, if the file-system is close to be full, this function may return
 664  * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of
 665  * the function. E.g., this happens if the limits on the journal size are too
 666  * tough and GC writes too much to the journal before an LEB is freed. This
 667  * might also mean that the journal is too large, and the TNC becomes to big,
 668  * so that the shrinker is constantly called, finds not clean znodes to free,
 669  * and requests commit. Well, this may also happen if the journal is all right,
 670  * but another kernel process consumes too much memory. Anyway, infinite
 671  * %-EAGAIN may happen, but in some extreme/misconfiguration cases.
 672  */
 673 int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
 674 {
 675         int i, err, ret, min_space = c->dead_wm;
 676         struct ubifs_lprops lp;
 677         struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
 678 
 679         ubifs_assert_cmt_locked(c);
 680         ubifs_assert(c, !c->ro_media && !c->ro_mount);
 681 
 682         if (ubifs_gc_should_commit(c))
 683                 return -EAGAIN;
 684 
 685         mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
 686 
 687         if (c->ro_error) {
 688                 ret = -EROFS;
 689                 goto out_unlock;
 690         }
 691 
 692         /* We expect the write-buffer to be empty on entry */
 693         ubifs_assert(c, !wbuf->used);
 694 
 695         for (i = 0; ; i++) {
 696                 int space_before, space_after;
 697 
 698                 cond_resched();
 699 
 700                 /* Give the commit an opportunity to run */
 701                 if (ubifs_gc_should_commit(c)) {
 702                         ret = -EAGAIN;
 703                         break;
 704                 }
 705 
 706                 if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) {
 707                         /*
 708                          * We've done enough iterations. Indexing LEBs were
 709                          * moved and will be available after the commit.
 710                          */
 711                         dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN");
 712                         ubifs_commit_required(c);
 713                         ret = -EAGAIN;
 714                         break;
 715                 }
 716 
 717                 if (i > HARD_LEBS_LIMIT) {
 718                         /*
 719                          * We've moved too many LEBs and have not made
 720                          * progress, give up.
 721                          */
 722                         dbg_gc("hard limit, -ENOSPC");
 723                         ret = -ENOSPC;
 724                         break;
 725                 }
 726 
 727                 /*
 728                  * Empty and freeable LEBs can turn up while we waited for
 729                  * the wbuf lock, or while we have been running GC. In that
 730                  * case, we should just return one of those instead of
 731                  * continuing to GC dirty LEBs. Hence we request
 732                  * 'ubifs_find_dirty_leb()' to return an empty LEB if it can.
 733                  */
 734                 ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1);
 735                 if (ret) {
 736                         if (ret == -ENOSPC)
 737                                 dbg_gc("no more dirty LEBs");
 738                         break;
 739                 }
 740 
 741                 dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)",
 742                        lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty,
 743                        min_space);
 744 
 745                 space_before = c->leb_size - wbuf->offs - wbuf->used;
 746                 if (wbuf->lnum == -1)
 747                         space_before = 0;
 748 
 749                 ret = ubifs_garbage_collect_leb(c, &lp);
 750                 if (ret < 0) {
 751                         if (ret == -EAGAIN) {
 752                                 /*
 753                                  * This is not error, so we have to return the
 754                                  * LEB to lprops. But if 'ubifs_return_leb()'
 755                                  * fails, its failure code is propagated to the
 756                                  * caller instead of the original '-EAGAIN'.
 757                                  */
 758                                 err = ubifs_return_leb(c, lp.lnum);
 759                                 if (err)
 760                                         ret = err;
 761                                 break;
 762                         }
 763                         goto out;
 764                 }
 765 
 766                 if (ret == LEB_FREED) {
 767                         /* An LEB has been freed and is ready for use */
 768                         dbg_gc("LEB %d freed, return", lp.lnum);
 769                         ret = lp.lnum;
 770                         break;
 771                 }
 772 
 773                 if (ret == LEB_FREED_IDX) {
 774                         /*
 775                          * This was an indexing LEB and it cannot be
 776                          * immediately used. And instead of requesting the
 777                          * commit straight away, we try to garbage collect some
 778                          * more.
 779                          */
 780                         dbg_gc("indexing LEB %d freed, continue", lp.lnum);
 781                         continue;
 782                 }
 783 
 784                 ubifs_assert(c, ret == LEB_RETAINED);
 785                 space_after = c->leb_size - wbuf->offs - wbuf->used;
 786                 dbg_gc("LEB %d retained, freed %d bytes", lp.lnum,
 787                        space_after - space_before);
 788 
 789                 if (space_after > space_before) {
 790                         /* GC makes progress, keep working */
 791                         min_space >>= 1;
 792                         if (min_space < c->dead_wm)
 793                                 min_space = c->dead_wm;
 794                         continue;
 795                 }
 796 
 797                 dbg_gc("did not make progress");
 798 
 799                 /*
 800                  * GC moved an LEB bud have not done any progress. This means
 801                  * that the previous GC head LEB contained too few free space
 802                  * and the LEB which was GC'ed contained only large nodes which
 803                  * did not fit that space.
 804                  *
 805                  * We can do 2 things:
 806                  * 1. pick another LEB in a hope it'll contain a small node
 807                  *    which will fit the space we have at the end of current GC
 808                  *    head LEB, but there is no guarantee, so we try this out
 809                  *    unless we have already been working for too long;
 810                  * 2. request an LEB with more dirty space, which will force
 811                  *    'ubifs_find_dirty_leb()' to start scanning the lprops
 812                  *    table, instead of just picking one from the heap
 813                  *    (previously it already picked the dirtiest LEB).
 814                  */
 815                 if (i < SOFT_LEBS_LIMIT) {
 816                         dbg_gc("try again");
 817                         continue;
 818                 }
 819 
 820                 min_space <<= 1;
 821                 if (min_space > c->dark_wm)
 822                         min_space = c->dark_wm;
 823                 dbg_gc("set min. space to %d", min_space);
 824         }
 825 
 826         if (ret == -ENOSPC && !list_empty(&c->idx_gc)) {
 827                 dbg_gc("no space, some index LEBs GC'ed, -EAGAIN");
 828                 ubifs_commit_required(c);
 829                 ret = -EAGAIN;
 830         }
 831 
 832         err = ubifs_wbuf_sync_nolock(wbuf);
 833         if (!err)
 834                 err = ubifs_leb_unmap(c, c->gc_lnum);
 835         if (err) {
 836                 ret = err;
 837                 goto out;
 838         }
 839 out_unlock:
 840         mutex_unlock(&wbuf->io_mutex);
 841         return ret;
 842 
 843 out:
 844         ubifs_assert(c, ret < 0);
 845         ubifs_assert(c, ret != -ENOSPC && ret != -EAGAIN);
 846         ubifs_wbuf_sync_nolock(wbuf);
 847         ubifs_ro_mode(c, ret);
 848         mutex_unlock(&wbuf->io_mutex);
 849         ubifs_return_leb(c, lp.lnum);
 850         return ret;
 851 }
 852 
 853 /**
 854  * ubifs_gc_start_commit - garbage collection at start of commit.
 855  * @c: UBIFS file-system description object
 856  *
 857  * If a LEB has only dirty and free space, then we may safely unmap it and make
 858  * it free.  Note, we cannot do this with indexing LEBs because dirty space may
 859  * correspond index nodes that are required for recovery.  In that case, the
 860  * LEB cannot be unmapped until after the next commit.
 861  *
 862  * This function returns %0 upon success and a negative error code upon failure.
 863  */
 864 int ubifs_gc_start_commit(struct ubifs_info *c)
 865 {
 866         struct ubifs_gced_idx_leb *idx_gc;
 867         const struct ubifs_lprops *lp;
 868         int err = 0, flags;
 869 
 870         ubifs_get_lprops(c);
 871 
 872         /*
 873          * Unmap (non-index) freeable LEBs. Note that recovery requires that all
 874          * wbufs are sync'd before this, which is done in 'do_commit()'.
 875          */
 876         while (1) {
 877                 lp = ubifs_fast_find_freeable(c);
 878                 if (!lp)
 879                         break;
 880                 ubifs_assert(c, !(lp->flags & LPROPS_TAKEN));
 881                 ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
 882                 err = ubifs_leb_unmap(c, lp->lnum);
 883                 if (err)
 884                         goto out;
 885                 lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
 886                 if (IS_ERR(lp)) {
 887                         err = PTR_ERR(lp);
 888                         goto out;
 889                 }
 890                 ubifs_assert(c, !(lp->flags & LPROPS_TAKEN));
 891                 ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
 892         }
 893 
 894         /* Mark GC'd index LEBs OK to unmap after this commit finishes */
 895         list_for_each_entry(idx_gc, &c->idx_gc, list)
 896                 idx_gc->unmap = 1;
 897 
 898         /* Record index freeable LEBs for unmapping after commit */
 899         while (1) {
 900                 lp = ubifs_fast_find_frdi_idx(c);
 901                 if (IS_ERR(lp)) {
 902                         err = PTR_ERR(lp);
 903                         goto out;
 904                 }
 905                 if (!lp)
 906                         break;
 907                 idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
 908                 if (!idx_gc) {
 909                         err = -ENOMEM;
 910                         goto out;
 911                 }
 912                 ubifs_assert(c, !(lp->flags & LPROPS_TAKEN));
 913                 ubifs_assert(c, lp->flags & LPROPS_INDEX);
 914                 /* Don't release the LEB until after the next commit */
 915                 flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
 916                 lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
 917                 if (IS_ERR(lp)) {
 918                         err = PTR_ERR(lp);
 919                         kfree(idx_gc);
 920                         goto out;
 921                 }
 922                 ubifs_assert(c, lp->flags & LPROPS_TAKEN);
 923                 ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
 924                 idx_gc->lnum = lp->lnum;
 925                 idx_gc->unmap = 1;
 926                 list_add(&idx_gc->list, &c->idx_gc);
 927         }
 928 out:
 929         ubifs_release_lprops(c);
 930         return err;
 931 }
 932 
 933 /**
 934  * ubifs_gc_end_commit - garbage collection at end of commit.
 935  * @c: UBIFS file-system description object
 936  *
 937  * This function completes out-of-place garbage collection of index LEBs.
 938  */
 939 int ubifs_gc_end_commit(struct ubifs_info *c)
 940 {
 941         struct ubifs_gced_idx_leb *idx_gc, *tmp;
 942         struct ubifs_wbuf *wbuf;
 943         int err = 0;
 944 
 945         wbuf = &c->jheads[GCHD].wbuf;
 946         mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
 947         list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list)
 948                 if (idx_gc->unmap) {
 949                         dbg_gc("LEB %d", idx_gc->lnum);
 950                         err = ubifs_leb_unmap(c, idx_gc->lnum);
 951                         if (err)
 952                                 goto out;
 953                         err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC,
 954                                           LPROPS_NC, 0, LPROPS_TAKEN, -1);
 955                         if (err)
 956                                 goto out;
 957                         list_del(&idx_gc->list);
 958                         kfree(idx_gc);
 959                 }
 960 out:
 961         mutex_unlock(&wbuf->io_mutex);
 962         return err;
 963 }
 964 
 965 /**
 966  * ubifs_destroy_idx_gc - destroy idx_gc list.
 967  * @c: UBIFS file-system description object
 968  *
 969  * This function destroys the @c->idx_gc list. It is called when unmounting
 970  * so locks are not needed. Returns zero in case of success and a negative
 971  * error code in case of failure.
 972  */
 973 void ubifs_destroy_idx_gc(struct ubifs_info *c)
 974 {
 975         while (!list_empty(&c->idx_gc)) {
 976                 struct ubifs_gced_idx_leb *idx_gc;
 977 
 978                 idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb,
 979                                     list);
 980                 c->idx_gc_cnt -= 1;
 981                 list_del(&idx_gc->list);
 982                 kfree(idx_gc);
 983         }
 984 }
 985 
 986 /**
 987  * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list.
 988  * @c: UBIFS file-system description object
 989  *
 990  * Called during start commit so locks are not needed.
 991  */
 992 int ubifs_get_idx_gc_leb(struct ubifs_info *c)
 993 {
 994         struct ubifs_gced_idx_leb *idx_gc;
 995         int lnum;
 996 
 997         if (list_empty(&c->idx_gc))
 998                 return -ENOSPC;
 999         idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list);
1000         lnum = idx_gc->lnum;
1001         /* c->idx_gc_cnt is updated by the caller when lprops are updated */
1002         list_del(&idx_gc->list);
1003         kfree(idx_gc);
1004         return lnum;
1005 }

/* [<][>][^][v][top][bottom][index][help] */