root/fs/xfs/libxfs/xfs_ag_resv.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. xfs_ag_resv_critical
  2. xfs_ag_resv_needed
  3. __xfs_ag_resv_free
  4. xfs_ag_resv_free
  5. __xfs_ag_resv_init
  6. xfs_ag_resv_init
  7. xfs_ag_resv_alloc_extent
  8. xfs_ag_resv_free_extent

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * Copyright (C) 2016 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_mount.h"
  13 #include "xfs_alloc.h"
  14 #include "xfs_errortag.h"
  15 #include "xfs_error.h"
  16 #include "xfs_trace.h"
  17 #include "xfs_trans.h"
  18 #include "xfs_rmap_btree.h"
  19 #include "xfs_btree.h"
  20 #include "xfs_refcount_btree.h"
  21 #include "xfs_ialloc_btree.h"
  22 
  23 /*
  24  * Per-AG Block Reservations
  25  *
  26  * For some kinds of allocation group metadata structures, it is advantageous
  27  * to reserve a small number of blocks in each AG so that future expansions of
  28  * that data structure do not encounter ENOSPC because errors during a btree
  29  * split cause the filesystem to go offline.
  30  *
  31  * Prior to the introduction of reflink, this wasn't an issue because the free
  32  * space btrees maintain a reserve of space (the AGFL) to handle any expansion
  33  * that may be necessary; and allocations of other metadata (inodes, BMBT,
  34  * dir/attr) aren't restricted to a single AG.  However, with reflink it is
  35  * possible to allocate all the space in an AG, have subsequent reflink/CoW
  36  * activity expand the refcount btree, and discover that there's no space left
  37  * to handle that expansion.  Since we can calculate the maximum size of the
  38  * refcount btree, we can reserve space for it and avoid ENOSPC.
  39  *
  40  * Handling per-AG reservations consists of three changes to the allocator's
  41  * behavior:  First, because these reservations are always needed, we decrease
  42  * the ag_max_usable counter to reflect the size of the AG after the reserved
  43  * blocks are taken.  Second, the reservations must be reflected in the
  44  * fdblocks count to maintain proper accounting.  Third, each AG must maintain
  45  * its own reserved block counter so that we can calculate the amount of space
  46  * that must remain free to maintain the reservations.  Fourth, the "remaining
  47  * reserved blocks" count must be used when calculating the length of the
  48  * longest free extent in an AG and to clamp maxlen in the per-AG allocation
  49  * functions.  In other words, we maintain a virtual allocation via in-core
  50  * accounting tricks so that we don't have to clean up after a crash. :)
  51  *
  52  * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
  53  * values via struct xfs_alloc_arg or directly to the xfs_free_extent
  54  * function.  It might seem a little funny to maintain a reservoir of blocks
  55  * to feed another reservoir, but the AGFL only holds enough blocks to get
  56  * through the next transaction.  The per-AG reservation is to ensure (we
  57  * hope) that each AG never runs out of blocks.  Each data structure wanting
  58  * to use the reservation system should update ask/used in xfs_ag_resv_init.
  59  */
  60 
  61 /*
  62  * Are we critically low on blocks?  For now we'll define that as the number
  63  * of blocks we can get our hands on being less than 10% of what we reserved
  64  * or less than some arbitrary number (maximum btree height).
  65  */
  66 bool
  67 xfs_ag_resv_critical(
  68         struct xfs_perag                *pag,
  69         enum xfs_ag_resv_type           type)
  70 {
  71         xfs_extlen_t                    avail;
  72         xfs_extlen_t                    orig;
  73 
  74         switch (type) {
  75         case XFS_AG_RESV_METADATA:
  76                 avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
  77                 orig = pag->pag_meta_resv.ar_asked;
  78                 break;
  79         case XFS_AG_RESV_RMAPBT:
  80                 avail = pag->pagf_freeblks + pag->pagf_flcount -
  81                         pag->pag_meta_resv.ar_reserved;
  82                 orig = pag->pag_rmapbt_resv.ar_asked;
  83                 break;
  84         default:
  85                 ASSERT(0);
  86                 return false;
  87         }
  88 
  89         trace_xfs_ag_resv_critical(pag, type, avail);
  90 
  91         /* Critically low if less than 10% or max btree height remains. */
  92         return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
  93                         pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
  94 }
  95 
  96 /*
  97  * How many blocks are reserved but not used, and therefore must not be
  98  * allocated away?
  99  */
 100 xfs_extlen_t
 101 xfs_ag_resv_needed(
 102         struct xfs_perag                *pag,
 103         enum xfs_ag_resv_type           type)
 104 {
 105         xfs_extlen_t                    len;
 106 
 107         len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
 108         switch (type) {
 109         case XFS_AG_RESV_METADATA:
 110         case XFS_AG_RESV_RMAPBT:
 111                 len -= xfs_perag_resv(pag, type)->ar_reserved;
 112                 break;
 113         case XFS_AG_RESV_NONE:
 114                 /* empty */
 115                 break;
 116         default:
 117                 ASSERT(0);
 118         }
 119 
 120         trace_xfs_ag_resv_needed(pag, type, len);
 121 
 122         return len;
 123 }
 124 
 125 /* Clean out a reservation */
 126 static int
 127 __xfs_ag_resv_free(
 128         struct xfs_perag                *pag,
 129         enum xfs_ag_resv_type           type)
 130 {
 131         struct xfs_ag_resv              *resv;
 132         xfs_extlen_t                    oldresv;
 133         int                             error;
 134 
 135         trace_xfs_ag_resv_free(pag, type, 0);
 136 
 137         resv = xfs_perag_resv(pag, type);
 138         if (pag->pag_agno == 0)
 139                 pag->pag_mount->m_ag_max_usable += resv->ar_asked;
 140         /*
 141          * RMAPBT blocks come from the AGFL and AGFL blocks are always
 142          * considered "free", so whatever was reserved at mount time must be
 143          * given back at umount.
 144          */
 145         if (type == XFS_AG_RESV_RMAPBT)
 146                 oldresv = resv->ar_orig_reserved;
 147         else
 148                 oldresv = resv->ar_reserved;
 149         error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
 150         resv->ar_reserved = 0;
 151         resv->ar_asked = 0;
 152         resv->ar_orig_reserved = 0;
 153 
 154         if (error)
 155                 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
 156                                 error, _RET_IP_);
 157         return error;
 158 }
 159 
 160 /* Free a per-AG reservation. */
 161 int
 162 xfs_ag_resv_free(
 163         struct xfs_perag                *pag)
 164 {
 165         int                             error;
 166         int                             err2;
 167 
 168         error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
 169         err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
 170         if (err2 && !error)
 171                 error = err2;
 172         return error;
 173 }
 174 
 175 static int
 176 __xfs_ag_resv_init(
 177         struct xfs_perag                *pag,
 178         enum xfs_ag_resv_type           type,
 179         xfs_extlen_t                    ask,
 180         xfs_extlen_t                    used)
 181 {
 182         struct xfs_mount                *mp = pag->pag_mount;
 183         struct xfs_ag_resv              *resv;
 184         int                             error;
 185         xfs_extlen_t                    hidden_space;
 186 
 187         if (used > ask)
 188                 ask = used;
 189 
 190         switch (type) {
 191         case XFS_AG_RESV_RMAPBT:
 192                 /*
 193                  * Space taken by the rmapbt is not subtracted from fdblocks
 194                  * because the rmapbt lives in the free space.  Here we must
 195                  * subtract the entire reservation from fdblocks so that we
 196                  * always have blocks available for rmapbt expansion.
 197                  */
 198                 hidden_space = ask;
 199                 break;
 200         case XFS_AG_RESV_METADATA:
 201                 /*
 202                  * Space taken by all other metadata btrees are accounted
 203                  * on-disk as used space.  We therefore only hide the space
 204                  * that is reserved but not used by the trees.
 205                  */
 206                 hidden_space = ask - used;
 207                 break;
 208         default:
 209                 ASSERT(0);
 210                 return -EINVAL;
 211         }
 212         error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
 213         if (error) {
 214                 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
 215                                 error, _RET_IP_);
 216                 xfs_warn(mp,
 217 "Per-AG reservation for AG %u failed.  Filesystem may run out of space.",
 218                                 pag->pag_agno);
 219                 return error;
 220         }
 221 
 222         /*
 223          * Reduce the maximum per-AG allocation length by however much we're
 224          * trying to reserve for an AG.  Since this is a filesystem-wide
 225          * counter, we only make the adjustment for AG 0.  This assumes that
 226          * there aren't any AGs hungrier for per-AG reservation than AG 0.
 227          */
 228         if (pag->pag_agno == 0)
 229                 mp->m_ag_max_usable -= ask;
 230 
 231         resv = xfs_perag_resv(pag, type);
 232         resv->ar_asked = ask;
 233         resv->ar_orig_reserved = hidden_space;
 234         resv->ar_reserved = ask - used;
 235 
 236         trace_xfs_ag_resv_init(pag, type, ask);
 237         return 0;
 238 }
 239 
 240 /* Create a per-AG block reservation. */
 241 int
 242 xfs_ag_resv_init(
 243         struct xfs_perag                *pag,
 244         struct xfs_trans                *tp)
 245 {
 246         struct xfs_mount                *mp = pag->pag_mount;
 247         xfs_agnumber_t                  agno = pag->pag_agno;
 248         xfs_extlen_t                    ask;
 249         xfs_extlen_t                    used;
 250         int                             error = 0;
 251 
 252         /* Create the metadata reservation. */
 253         if (pag->pag_meta_resv.ar_asked == 0) {
 254                 ask = used = 0;
 255 
 256                 error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used);
 257                 if (error)
 258                         goto out;
 259 
 260                 error = xfs_finobt_calc_reserves(mp, tp, agno, &ask, &used);
 261                 if (error)
 262                         goto out;
 263 
 264                 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
 265                                 ask, used);
 266                 if (error) {
 267                         /*
 268                          * Because we didn't have per-AG reservations when the
 269                          * finobt feature was added we might not be able to
 270                          * reserve all needed blocks.  Warn and fall back to the
 271                          * old and potentially buggy code in that case, but
 272                          * ensure we do have the reservation for the refcountbt.
 273                          */
 274                         ask = used = 0;
 275 
 276                         mp->m_finobt_nores = true;
 277 
 278                         error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask,
 279                                         &used);
 280                         if (error)
 281                                 goto out;
 282 
 283                         error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
 284                                         ask, used);
 285                         if (error)
 286                                 goto out;
 287                 }
 288         }
 289 
 290         /* Create the RMAPBT metadata reservation */
 291         if (pag->pag_rmapbt_resv.ar_asked == 0) {
 292                 ask = used = 0;
 293 
 294                 error = xfs_rmapbt_calc_reserves(mp, tp, agno, &ask, &used);
 295                 if (error)
 296                         goto out;
 297 
 298                 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
 299                 if (error)
 300                         goto out;
 301         }
 302 
 303 #ifdef DEBUG
 304         /* need to read in the AGF for the ASSERT below to work */
 305         error = xfs_alloc_pagf_init(pag->pag_mount, tp, pag->pag_agno, 0);
 306         if (error)
 307                 return error;
 308 
 309         ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
 310                xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
 311                pag->pagf_freeblks + pag->pagf_flcount);
 312 #endif
 313 out:
 314         return error;
 315 }
 316 
 317 /* Allocate a block from the reservation. */
 318 void
 319 xfs_ag_resv_alloc_extent(
 320         struct xfs_perag                *pag,
 321         enum xfs_ag_resv_type           type,
 322         struct xfs_alloc_arg            *args)
 323 {
 324         struct xfs_ag_resv              *resv;
 325         xfs_extlen_t                    len;
 326         uint                            field;
 327 
 328         trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
 329 
 330         switch (type) {
 331         case XFS_AG_RESV_AGFL:
 332                 return;
 333         case XFS_AG_RESV_METADATA:
 334         case XFS_AG_RESV_RMAPBT:
 335                 resv = xfs_perag_resv(pag, type);
 336                 break;
 337         default:
 338                 ASSERT(0);
 339                 /* fall through */
 340         case XFS_AG_RESV_NONE:
 341                 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
 342                                        XFS_TRANS_SB_FDBLOCKS;
 343                 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
 344                 return;
 345         }
 346 
 347         len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
 348         resv->ar_reserved -= len;
 349         if (type == XFS_AG_RESV_RMAPBT)
 350                 return;
 351         /* Allocations of reserved blocks only need on-disk sb updates... */
 352         xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
 353         /* ...but non-reserved blocks need in-core and on-disk updates. */
 354         if (args->len > len)
 355                 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
 356                                 -((int64_t)args->len - len));
 357 }
 358 
 359 /* Free a block to the reservation. */
 360 void
 361 xfs_ag_resv_free_extent(
 362         struct xfs_perag                *pag,
 363         enum xfs_ag_resv_type           type,
 364         struct xfs_trans                *tp,
 365         xfs_extlen_t                    len)
 366 {
 367         xfs_extlen_t                    leftover;
 368         struct xfs_ag_resv              *resv;
 369 
 370         trace_xfs_ag_resv_free_extent(pag, type, len);
 371 
 372         switch (type) {
 373         case XFS_AG_RESV_AGFL:
 374                 return;
 375         case XFS_AG_RESV_METADATA:
 376         case XFS_AG_RESV_RMAPBT:
 377                 resv = xfs_perag_resv(pag, type);
 378                 break;
 379         default:
 380                 ASSERT(0);
 381                 /* fall through */
 382         case XFS_AG_RESV_NONE:
 383                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
 384                 return;
 385         }
 386 
 387         leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
 388         resv->ar_reserved += leftover;
 389         if (type == XFS_AG_RESV_RMAPBT)
 390                 return;
 391         /* Freeing into the reserved pool only requires on-disk update... */
 392         xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
 393         /* ...but freeing beyond that requires in-core and on-disk update. */
 394         if (len > leftover)
 395                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
 396 }

/* [<][>][^][v][top][bottom][index][help] */