root/fs/xfs/libxfs/xfs_alloc.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. xfs_agfl_size
  2. xfs_refc_block
  3. xfs_prealloc_blocks
  4. xfs_alloc_set_aside
  5. xfs_alloc_ag_max_usable
  6. xfs_alloc_lookup_eq
  7. xfs_alloc_lookup_ge
  8. xfs_alloc_lookup_le
  9. xfs_alloc_update
  10. xfs_alloc_get_rec
  11. xfs_alloc_compute_aligned
  12. xfs_alloc_compute_diff
  13. xfs_alloc_fix_len
  14. xfs_alloc_fixup_trees
  15. xfs_agfl_verify
  16. xfs_agfl_read_verify
  17. xfs_agfl_write_verify
  18. xfs_alloc_read_agfl
  19. xfs_alloc_update_counters
  20. xfs_alloc_ag_vextent_small
  21. xfs_alloc_ag_vextent
  22. xfs_alloc_ag_vextent_exact
  23. xfs_alloc_find_best_extent
  24. xfs_alloc_ag_vextent_near
  25. xfs_alloc_ag_vextent_size
  26. xfs_free_ag_extent
  27. xfs_alloc_compute_maxlevels
  28. xfs_alloc_longest_free_extent
  29. xfs_alloc_min_freelist
  30. xfs_alloc_space_available
  31. xfs_free_agfl_block
  32. xfs_agfl_needs_reset
  33. xfs_agfl_reset
  34. xfs_defer_agfl_block
  35. xfs_alloc_fix_freelist
  36. xfs_alloc_get_freelist
  37. xfs_alloc_log_agf
  38. xfs_alloc_pagf_init
  39. xfs_alloc_put_freelist
  40. xfs_agf_verify
  41. xfs_agf_read_verify
  42. xfs_agf_write_verify
  43. xfs_read_agf
  44. xfs_alloc_read_agf
  45. xfs_alloc_vextent
  46. xfs_free_extent_fix_freelist
  47. __xfs_free_extent
  48. xfs_alloc_query_range_helper
  49. xfs_alloc_query_range
  50. xfs_alloc_query_all
  51. xfs_alloc_has_record
  52. xfs_agfl_walk

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_format.h"
   9 #include "xfs_log_format.h"
  10 #include "xfs_shared.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_bit.h"
  13 #include "xfs_sb.h"
  14 #include "xfs_mount.h"
  15 #include "xfs_defer.h"
  16 #include "xfs_btree.h"
  17 #include "xfs_rmap.h"
  18 #include "xfs_alloc_btree.h"
  19 #include "xfs_alloc.h"
  20 #include "xfs_extent_busy.h"
  21 #include "xfs_errortag.h"
  22 #include "xfs_error.h"
  23 #include "xfs_trace.h"
  24 #include "xfs_trans.h"
  25 #include "xfs_buf_item.h"
  26 #include "xfs_log.h"
  27 #include "xfs_ag_resv.h"
  28 #include "xfs_bmap.h"
  29 
  30 extern kmem_zone_t      *xfs_bmap_free_item_zone;
  31 
  32 struct workqueue_struct *xfs_alloc_wq;
  33 
  34 #define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  35 
  36 #define XFSA_FIXUP_BNO_OK       1
  37 #define XFSA_FIXUP_CNT_OK       2
  38 
  39 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  40 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  41 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  42 
  43 /*
  44  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
  45  * the beginning of the block for a proper header with the location information
  46  * and CRC.
  47  */
  48 unsigned int
  49 xfs_agfl_size(
  50         struct xfs_mount        *mp)
  51 {
  52         unsigned int            size = mp->m_sb.sb_sectsize;
  53 
  54         if (xfs_sb_version_hascrc(&mp->m_sb))
  55                 size -= sizeof(struct xfs_agfl);
  56 
  57         return size / sizeof(xfs_agblock_t);
  58 }
  59 
  60 unsigned int
  61 xfs_refc_block(
  62         struct xfs_mount        *mp)
  63 {
  64         if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  65                 return XFS_RMAP_BLOCK(mp) + 1;
  66         if (xfs_sb_version_hasfinobt(&mp->m_sb))
  67                 return XFS_FIBT_BLOCK(mp) + 1;
  68         return XFS_IBT_BLOCK(mp) + 1;
  69 }
  70 
  71 xfs_extlen_t
  72 xfs_prealloc_blocks(
  73         struct xfs_mount        *mp)
  74 {
  75         if (xfs_sb_version_hasreflink(&mp->m_sb))
  76                 return xfs_refc_block(mp) + 1;
  77         if (xfs_sb_version_hasrmapbt(&mp->m_sb))
  78                 return XFS_RMAP_BLOCK(mp) + 1;
  79         if (xfs_sb_version_hasfinobt(&mp->m_sb))
  80                 return XFS_FIBT_BLOCK(mp) + 1;
  81         return XFS_IBT_BLOCK(mp) + 1;
  82 }
  83 
  84 /*
  85  * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
  86  * AGF buffer (PV 947395), we place constraints on the relationship among
  87  * actual allocations for data blocks, freelist blocks, and potential file data
  88  * bmap btree blocks. However, these restrictions may result in no actual space
  89  * allocated for a delayed extent, for example, a data block in a certain AG is
  90  * allocated but there is no additional block for the additional bmap btree
  91  * block due to a split of the bmap btree of the file. The result of this may
  92  * lead to an infinite loop when the file gets flushed to disk and all delayed
  93  * extents need to be actually allocated. To get around this, we explicitly set
  94  * aside a few blocks which will not be reserved in delayed allocation.
  95  *
  96  * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
  97  * potential split of the file's bmap btree.
  98  */
  99 unsigned int
 100 xfs_alloc_set_aside(
 101         struct xfs_mount        *mp)
 102 {
 103         return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
 104 }
 105 
 106 /*
 107  * When deciding how much space to allocate out of an AG, we limit the
 108  * allocation maximum size to the size the AG. However, we cannot use all the
 109  * blocks in the AG - some are permanently used by metadata. These
 110  * blocks are generally:
 111  *      - the AG superblock, AGF, AGI and AGFL
 112  *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
 113  *        the AGI free inode and rmap btree root blocks.
 114  *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
 115  *      - the rmapbt root block
 116  *
 117  * The AG headers are sector sized, so the amount of space they take up is
 118  * dependent on filesystem geometry. The others are all single blocks.
 119  */
 120 unsigned int
 121 xfs_alloc_ag_max_usable(
 122         struct xfs_mount        *mp)
 123 {
 124         unsigned int            blocks;
 125 
 126         blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
 127         blocks += XFS_ALLOC_AGFL_RESERVE;
 128         blocks += 3;                    /* AGF, AGI btree root blocks */
 129         if (xfs_sb_version_hasfinobt(&mp->m_sb))
 130                 blocks++;               /* finobt root block */
 131         if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 132                 blocks++;               /* rmap root block */
 133         if (xfs_sb_version_hasreflink(&mp->m_sb))
 134                 blocks++;               /* refcount root block */
 135 
 136         return mp->m_sb.sb_agblocks - blocks;
 137 }
 138 
 139 /*
 140  * Lookup the record equal to [bno, len] in the btree given by cur.
 141  */
 142 STATIC int                              /* error */
 143 xfs_alloc_lookup_eq(
 144         struct xfs_btree_cur    *cur,   /* btree cursor */
 145         xfs_agblock_t           bno,    /* starting block of extent */
 146         xfs_extlen_t            len,    /* length of extent */
 147         int                     *stat)  /* success/failure */
 148 {
 149         cur->bc_rec.a.ar_startblock = bno;
 150         cur->bc_rec.a.ar_blockcount = len;
 151         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
 152 }
 153 
 154 /*
 155  * Lookup the first record greater than or equal to [bno, len]
 156  * in the btree given by cur.
 157  */
 158 int                             /* error */
 159 xfs_alloc_lookup_ge(
 160         struct xfs_btree_cur    *cur,   /* btree cursor */
 161         xfs_agblock_t           bno,    /* starting block of extent */
 162         xfs_extlen_t            len,    /* length of extent */
 163         int                     *stat)  /* success/failure */
 164 {
 165         cur->bc_rec.a.ar_startblock = bno;
 166         cur->bc_rec.a.ar_blockcount = len;
 167         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 168 }
 169 
 170 /*
 171  * Lookup the first record less than or equal to [bno, len]
 172  * in the btree given by cur.
 173  */
 174 int                                     /* error */
 175 xfs_alloc_lookup_le(
 176         struct xfs_btree_cur    *cur,   /* btree cursor */
 177         xfs_agblock_t           bno,    /* starting block of extent */
 178         xfs_extlen_t            len,    /* length of extent */
 179         int                     *stat)  /* success/failure */
 180 {
 181         cur->bc_rec.a.ar_startblock = bno;
 182         cur->bc_rec.a.ar_blockcount = len;
 183         return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
 184 }
 185 
 186 /*
 187  * Update the record referred to by cur to the value given
 188  * by [bno, len].
 189  * This either works (return 0) or gets an EFSCORRUPTED error.
 190  */
 191 STATIC int                              /* error */
 192 xfs_alloc_update(
 193         struct xfs_btree_cur    *cur,   /* btree cursor */
 194         xfs_agblock_t           bno,    /* starting block of extent */
 195         xfs_extlen_t            len)    /* length of extent */
 196 {
 197         union xfs_btree_rec     rec;
 198 
 199         rec.alloc.ar_startblock = cpu_to_be32(bno);
 200         rec.alloc.ar_blockcount = cpu_to_be32(len);
 201         return xfs_btree_update(cur, &rec);
 202 }
 203 
 204 /*
 205  * Get the data from the pointed-to record.
 206  */
 207 int                                     /* error */
 208 xfs_alloc_get_rec(
 209         struct xfs_btree_cur    *cur,   /* btree cursor */
 210         xfs_agblock_t           *bno,   /* output: starting block of extent */
 211         xfs_extlen_t            *len,   /* output: length of extent */
 212         int                     *stat)  /* output: success/failure */
 213 {
 214         struct xfs_mount        *mp = cur->bc_mp;
 215         xfs_agnumber_t          agno = cur->bc_private.a.agno;
 216         union xfs_btree_rec     *rec;
 217         int                     error;
 218 
 219         error = xfs_btree_get_rec(cur, &rec, stat);
 220         if (error || !(*stat))
 221                 return error;
 222 
 223         *bno = be32_to_cpu(rec->alloc.ar_startblock);
 224         *len = be32_to_cpu(rec->alloc.ar_blockcount);
 225 
 226         if (*len == 0)
 227                 goto out_bad_rec;
 228 
 229         /* check for valid extent range, including overflow */
 230         if (!xfs_verify_agbno(mp, agno, *bno))
 231                 goto out_bad_rec;
 232         if (*bno > *bno + *len)
 233                 goto out_bad_rec;
 234         if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
 235                 goto out_bad_rec;
 236 
 237         return 0;
 238 
 239 out_bad_rec:
 240         xfs_warn(mp,
 241                 "%s Freespace BTree record corruption in AG %d detected!",
 242                 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
 243         xfs_warn(mp,
 244                 "start block 0x%x block count 0x%x", *bno, *len);
 245         return -EFSCORRUPTED;
 246 }
 247 
 248 /*
 249  * Compute aligned version of the found extent.
 250  * Takes alignment and min length into account.
 251  */
 252 STATIC bool
 253 xfs_alloc_compute_aligned(
 254         xfs_alloc_arg_t *args,          /* allocation argument structure */
 255         xfs_agblock_t   foundbno,       /* starting block in found extent */
 256         xfs_extlen_t    foundlen,       /* length in found extent */
 257         xfs_agblock_t   *resbno,        /* result block number */
 258         xfs_extlen_t    *reslen,        /* result length */
 259         unsigned        *busy_gen)
 260 {
 261         xfs_agblock_t   bno = foundbno;
 262         xfs_extlen_t    len = foundlen;
 263         xfs_extlen_t    diff;
 264         bool            busy;
 265 
 266         /* Trim busy sections out of found extent */
 267         busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
 268 
 269         /*
 270          * If we have a largish extent that happens to start before min_agbno,
 271          * see if we can shift it into range...
 272          */
 273         if (bno < args->min_agbno && bno + len > args->min_agbno) {
 274                 diff = args->min_agbno - bno;
 275                 if (len > diff) {
 276                         bno += diff;
 277                         len -= diff;
 278                 }
 279         }
 280 
 281         if (args->alignment > 1 && len >= args->minlen) {
 282                 xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 283 
 284                 diff = aligned_bno - bno;
 285 
 286                 *resbno = aligned_bno;
 287                 *reslen = diff >= len ? 0 : len - diff;
 288         } else {
 289                 *resbno = bno;
 290                 *reslen = len;
 291         }
 292 
 293         return busy;
 294 }
 295 
 296 /*
 297  * Compute best start block and diff for "near" allocations.
 298  * freelen >= wantlen already checked by caller.
 299  */
 300 STATIC xfs_extlen_t                     /* difference value (absolute) */
 301 xfs_alloc_compute_diff(
 302         xfs_agblock_t   wantbno,        /* target starting block */
 303         xfs_extlen_t    wantlen,        /* target length */
 304         xfs_extlen_t    alignment,      /* target alignment */
 305         int             datatype,       /* are we allocating data? */
 306         xfs_agblock_t   freebno,        /* freespace's starting block */
 307         xfs_extlen_t    freelen,        /* freespace's length */
 308         xfs_agblock_t   *newbnop)       /* result: best start block from free */
 309 {
 310         xfs_agblock_t   freeend;        /* end of freespace extent */
 311         xfs_agblock_t   newbno1;        /* return block number */
 312         xfs_agblock_t   newbno2;        /* other new block number */
 313         xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 314         xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 315         xfs_agblock_t   wantend;        /* end of target extent */
 316         bool            userdata = xfs_alloc_is_userdata(datatype);
 317 
 318         ASSERT(freelen >= wantlen);
 319         freeend = freebno + freelen;
 320         wantend = wantbno + wantlen;
 321         /*
 322          * We want to allocate from the start of a free extent if it is past
 323          * the desired block or if we are allocating user data and the free
 324          * extent is before desired block. The second case is there to allow
 325          * for contiguous allocation from the remaining free space if the file
 326          * grows in the short term.
 327          */
 328         if (freebno >= wantbno || (userdata && freeend < wantend)) {
 329                 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 330                         newbno1 = NULLAGBLOCK;
 331         } else if (freeend >= wantend && alignment > 1) {
 332                 newbno1 = roundup(wantbno, alignment);
 333                 newbno2 = newbno1 - alignment;
 334                 if (newbno1 >= freeend)
 335                         newbno1 = NULLAGBLOCK;
 336                 else
 337                         newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 338                 if (newbno2 < freebno)
 339                         newbno2 = NULLAGBLOCK;
 340                 else
 341                         newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 342                 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 343                         if (newlen1 < newlen2 ||
 344                             (newlen1 == newlen2 &&
 345                              XFS_ABSDIFF(newbno1, wantbno) >
 346                              XFS_ABSDIFF(newbno2, wantbno)))
 347                                 newbno1 = newbno2;
 348                 } else if (newbno2 != NULLAGBLOCK)
 349                         newbno1 = newbno2;
 350         } else if (freeend >= wantend) {
 351                 newbno1 = wantbno;
 352         } else if (alignment > 1) {
 353                 newbno1 = roundup(freeend - wantlen, alignment);
 354                 if (newbno1 > freeend - wantlen &&
 355                     newbno1 - alignment >= freebno)
 356                         newbno1 -= alignment;
 357                 else if (newbno1 >= freeend)
 358                         newbno1 = NULLAGBLOCK;
 359         } else
 360                 newbno1 = freeend - wantlen;
 361         *newbnop = newbno1;
 362         return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 363 }
 364 
 365 /*
 366  * Fix up the length, based on mod and prod.
 367  * len should be k * prod + mod for some k.
 368  * If len is too small it is returned unchanged.
 369  * If len hits maxlen it is left alone.
 370  */
 371 STATIC void
 372 xfs_alloc_fix_len(
 373         xfs_alloc_arg_t *args)          /* allocation argument structure */
 374 {
 375         xfs_extlen_t    k;
 376         xfs_extlen_t    rlen;
 377 
 378         ASSERT(args->mod < args->prod);
 379         rlen = args->len;
 380         ASSERT(rlen >= args->minlen);
 381         ASSERT(rlen <= args->maxlen);
 382         if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 383             (args->mod == 0 && rlen < args->prod))
 384                 return;
 385         k = rlen % args->prod;
 386         if (k == args->mod)
 387                 return;
 388         if (k > args->mod)
 389                 rlen = rlen - (k - args->mod);
 390         else
 391                 rlen = rlen - args->prod + (args->mod - k);
 392         /* casts to (int) catch length underflows */
 393         if ((int)rlen < (int)args->minlen)
 394                 return;
 395         ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
 396         ASSERT(rlen % args->prod == args->mod);
 397         ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
 398                 rlen + args->minleft);
 399         args->len = rlen;
 400 }
 401 
 402 /*
 403  * Update the two btrees, logically removing from freespace the extent
 404  * starting at rbno, rlen blocks.  The extent is contained within the
 405  * actual (current) free extent fbno for flen blocks.
 406  * Flags are passed in indicating whether the cursors are set to the
 407  * relevant records.
 408  */
 409 STATIC int                              /* error code */
 410 xfs_alloc_fixup_trees(
 411         xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 412         xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 413         xfs_agblock_t   fbno,           /* starting block of free extent */
 414         xfs_extlen_t    flen,           /* length of free extent */
 415         xfs_agblock_t   rbno,           /* starting block of returned extent */
 416         xfs_extlen_t    rlen,           /* length of returned extent */
 417         int             flags)          /* flags, XFSA_FIXUP_... */
 418 {
 419         int             error;          /* error code */
 420         int             i;              /* operation results */
 421         xfs_agblock_t   nfbno1;         /* first new free startblock */
 422         xfs_agblock_t   nfbno2;         /* second new free startblock */
 423         xfs_extlen_t    nflen1=0;       /* first new free length */
 424         xfs_extlen_t    nflen2=0;       /* second new free length */
 425         struct xfs_mount *mp;
 426 
 427         mp = cnt_cur->bc_mp;
 428 
 429         /*
 430          * Look up the record in the by-size tree if necessary.
 431          */
 432         if (flags & XFSA_FIXUP_CNT_OK) {
 433 #ifdef DEBUG
 434                 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 435                         return error;
 436                 XFS_WANT_CORRUPTED_RETURN(mp,
 437                         i == 1 && nfbno1 == fbno && nflen1 == flen);
 438 #endif
 439         } else {
 440                 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 441                         return error;
 442                 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 443         }
 444         /*
 445          * Look up the record in the by-block tree if necessary.
 446          */
 447         if (flags & XFSA_FIXUP_BNO_OK) {
 448 #ifdef DEBUG
 449                 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 450                         return error;
 451                 XFS_WANT_CORRUPTED_RETURN(mp,
 452                         i == 1 && nfbno1 == fbno && nflen1 == flen);
 453 #endif
 454         } else {
 455                 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 456                         return error;
 457                 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 458         }
 459 
 460 #ifdef DEBUG
 461         if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 462                 struct xfs_btree_block  *bnoblock;
 463                 struct xfs_btree_block  *cntblock;
 464 
 465                 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 466                 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 467 
 468                 XFS_WANT_CORRUPTED_RETURN(mp,
 469                         bnoblock->bb_numrecs == cntblock->bb_numrecs);
 470         }
 471 #endif
 472 
 473         /*
 474          * Deal with all four cases: the allocated record is contained
 475          * within the freespace record, so we can have new freespace
 476          * at either (or both) end, or no freespace remaining.
 477          */
 478         if (rbno == fbno && rlen == flen)
 479                 nfbno1 = nfbno2 = NULLAGBLOCK;
 480         else if (rbno == fbno) {
 481                 nfbno1 = rbno + rlen;
 482                 nflen1 = flen - rlen;
 483                 nfbno2 = NULLAGBLOCK;
 484         } else if (rbno + rlen == fbno + flen) {
 485                 nfbno1 = fbno;
 486                 nflen1 = flen - rlen;
 487                 nfbno2 = NULLAGBLOCK;
 488         } else {
 489                 nfbno1 = fbno;
 490                 nflen1 = rbno - fbno;
 491                 nfbno2 = rbno + rlen;
 492                 nflen2 = (fbno + flen) - nfbno2;
 493         }
 494         /*
 495          * Delete the entry from the by-size btree.
 496          */
 497         if ((error = xfs_btree_delete(cnt_cur, &i)))
 498                 return error;
 499         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 500         /*
 501          * Add new by-size btree entry(s).
 502          */
 503         if (nfbno1 != NULLAGBLOCK) {
 504                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 505                         return error;
 506                 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 507                 if ((error = xfs_btree_insert(cnt_cur, &i)))
 508                         return error;
 509                 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 510         }
 511         if (nfbno2 != NULLAGBLOCK) {
 512                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 513                         return error;
 514                 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 515                 if ((error = xfs_btree_insert(cnt_cur, &i)))
 516                         return error;
 517                 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 518         }
 519         /*
 520          * Fix up the by-block btree entry(s).
 521          */
 522         if (nfbno1 == NULLAGBLOCK) {
 523                 /*
 524                  * No remaining freespace, just delete the by-block tree entry.
 525                  */
 526                 if ((error = xfs_btree_delete(bno_cur, &i)))
 527                         return error;
 528                 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 529         } else {
 530                 /*
 531                  * Update the by-block entry to start later|be shorter.
 532                  */
 533                 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 534                         return error;
 535         }
 536         if (nfbno2 != NULLAGBLOCK) {
 537                 /*
 538                  * 2 resulting free entries, need to add one.
 539                  */
 540                 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 541                         return error;
 542                 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
 543                 if ((error = xfs_btree_insert(bno_cur, &i)))
 544                         return error;
 545                 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 546         }
 547         return 0;
 548 }
 549 
 550 static xfs_failaddr_t
 551 xfs_agfl_verify(
 552         struct xfs_buf  *bp)
 553 {
 554         struct xfs_mount *mp = bp->b_mount;
 555         struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 556         int             i;
 557 
 558         /*
 559          * There is no verification of non-crc AGFLs because mkfs does not
 560          * initialise the AGFL to zero or NULL. Hence the only valid part of the
 561          * AGFL is what the AGF says is active. We can't get to the AGF, so we
 562          * can't verify just those entries are valid.
 563          */
 564         if (!xfs_sb_version_hascrc(&mp->m_sb))
 565                 return NULL;
 566 
 567         if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
 568                 return __this_address;
 569         if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
 570                 return __this_address;
 571         /*
 572          * during growfs operations, the perag is not fully initialised,
 573          * so we can't use it for any useful checking. growfs ensures we can't
 574          * use it by using uncached buffers that don't have the perag attached
 575          * so we can detect and avoid this problem.
 576          */
 577         if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 578                 return __this_address;
 579 
 580         for (i = 0; i < xfs_agfl_size(mp); i++) {
 581                 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 582                     be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 583                         return __this_address;
 584         }
 585 
 586         if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
 587                 return __this_address;
 588         return NULL;
 589 }
 590 
 591 static void
 592 xfs_agfl_read_verify(
 593         struct xfs_buf  *bp)
 594 {
 595         struct xfs_mount *mp = bp->b_mount;
 596         xfs_failaddr_t  fa;
 597 
 598         /*
 599          * There is no verification of non-crc AGFLs because mkfs does not
 600          * initialise the AGFL to zero or NULL. Hence the only valid part of the
 601          * AGFL is what the AGF says is active. We can't get to the AGF, so we
 602          * can't verify just those entries are valid.
 603          */
 604         if (!xfs_sb_version_hascrc(&mp->m_sb))
 605                 return;
 606 
 607         if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
 608                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
 609         else {
 610                 fa = xfs_agfl_verify(bp);
 611                 if (fa)
 612                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 613         }
 614 }
 615 
 616 static void
 617 xfs_agfl_write_verify(
 618         struct xfs_buf  *bp)
 619 {
 620         struct xfs_mount        *mp = bp->b_mount;
 621         struct xfs_buf_log_item *bip = bp->b_log_item;
 622         xfs_failaddr_t          fa;
 623 
 624         /* no verification of non-crc AGFLs */
 625         if (!xfs_sb_version_hascrc(&mp->m_sb))
 626                 return;
 627 
 628         fa = xfs_agfl_verify(bp);
 629         if (fa) {
 630                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 631                 return;
 632         }
 633 
 634         if (bip)
 635                 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 636 
 637         xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
 638 }
 639 
 640 const struct xfs_buf_ops xfs_agfl_buf_ops = {
 641         .name = "xfs_agfl",
 642         .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
 643         .verify_read = xfs_agfl_read_verify,
 644         .verify_write = xfs_agfl_write_verify,
 645         .verify_struct = xfs_agfl_verify,
 646 };
 647 
 648 /*
 649  * Read in the allocation group free block array.
 650  */
 651 int                                     /* error */
 652 xfs_alloc_read_agfl(
 653         xfs_mount_t     *mp,            /* mount point structure */
 654         xfs_trans_t     *tp,            /* transaction pointer */
 655         xfs_agnumber_t  agno,           /* allocation group number */
 656         xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 657 {
 658         xfs_buf_t       *bp;            /* return value */
 659         int             error;
 660 
 661         ASSERT(agno != NULLAGNUMBER);
 662         error = xfs_trans_read_buf(
 663                         mp, tp, mp->m_ddev_targp,
 664                         XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 665                         XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 666         if (error)
 667                 return error;
 668         xfs_buf_set_ref(bp, XFS_AGFL_REF);
 669         *bpp = bp;
 670         return 0;
 671 }
 672 
 673 STATIC int
 674 xfs_alloc_update_counters(
 675         struct xfs_trans        *tp,
 676         struct xfs_perag        *pag,
 677         struct xfs_buf          *agbp,
 678         long                    len)
 679 {
 680         struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 681 
 682         pag->pagf_freeblks += len;
 683         be32_add_cpu(&agf->agf_freeblks, len);
 684 
 685         xfs_trans_agblocks_delta(tp, len);
 686         if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 687                      be32_to_cpu(agf->agf_length)))
 688                 return -EFSCORRUPTED;
 689 
 690         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 691         return 0;
 692 }
 693 
 694 /*
 695  * Allocation group level functions.
 696  */
 697 
 698 /*
 699  * Deal with the case where only small freespaces remain. Either return the
 700  * contents of the last freespace record, or allocate space from the freelist if
 701  * there is nothing in the tree.
 702  */
 703 STATIC int                      /* error */
 704 xfs_alloc_ag_vextent_small(
 705         struct xfs_alloc_arg    *args,  /* allocation argument structure */
 706         struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
 707         xfs_agblock_t           *fbnop, /* result block number */
 708         xfs_extlen_t            *flenp, /* result length */
 709         int                     *stat)  /* status: 0-freelist, 1-normal/none */
 710 {
 711         int                     error = 0;
 712         xfs_agblock_t           fbno = NULLAGBLOCK;
 713         xfs_extlen_t            flen = 0;
 714         int                     i = 0;
 715 
 716         /*
 717          * If a cntbt cursor is provided, try to allocate the largest record in
 718          * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
 719          * allocation. Make sure to respect minleft even when pulling from the
 720          * freelist.
 721          */
 722         if (ccur)
 723                 error = xfs_btree_decrement(ccur, 0, &i);
 724         if (error)
 725                 goto error;
 726         if (i) {
 727                 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
 728                 if (error)
 729                         goto error;
 730                 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
 731                 goto out;
 732         }
 733 
 734         if (args->minlen != 1 || args->alignment != 1 ||
 735             args->resv == XFS_AG_RESV_AGFL ||
 736             (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
 737              args->minleft))
 738                 goto out;
 739 
 740         error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
 741         if (error)
 742                 goto error;
 743         if (fbno == NULLAGBLOCK)
 744                 goto out;
 745 
 746         xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
 747                               xfs_alloc_allow_busy_reuse(args->datatype));
 748 
 749         if (xfs_alloc_is_userdata(args->datatype)) {
 750                 struct xfs_buf  *bp;
 751 
 752                 bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
 753                 if (!bp) {
 754                         error = -EFSCORRUPTED;
 755                         goto error;
 756                 }
 757                 xfs_trans_binval(args->tp, bp);
 758         }
 759         *fbnop = args->agbno = fbno;
 760         *flenp = args->len = 1;
 761         XFS_WANT_CORRUPTED_GOTO(args->mp,
 762                 fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
 763                 error);
 764         args->wasfromfl = 1;
 765         trace_xfs_alloc_small_freelist(args);
 766 
 767         /*
 768          * If we're feeding an AGFL block to something that doesn't live in the
 769          * free space, we need to clear out the OWN_AG rmap.
 770          */
 771         error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
 772                               &XFS_RMAP_OINFO_AG);
 773         if (error)
 774                 goto error;
 775 
 776         *stat = 0;
 777         return 0;
 778 
 779 out:
 780         /*
 781          * Can't do the allocation, give up.
 782          */
 783         if (flen < args->minlen) {
 784                 args->agbno = NULLAGBLOCK;
 785                 trace_xfs_alloc_small_notenough(args);
 786                 flen = 0;
 787         }
 788         *fbnop = fbno;
 789         *flenp = flen;
 790         *stat = 1;
 791         trace_xfs_alloc_small_done(args);
 792         return 0;
 793 
 794 error:
 795         trace_xfs_alloc_small_error(args);
 796         return error;
 797 }
 798 
 799 /*
 800  * Allocate a variable extent in the allocation group agno.
 801  * Type and bno are used to determine where in the allocation group the
 802  * extent will start.
 803  * Extent's length (returned in *len) will be between minlen and maxlen,
 804  * and of the form k * prod + mod unless there's nothing that large.
 805  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 806  */
 807 STATIC int                      /* error */
 808 xfs_alloc_ag_vextent(
 809         xfs_alloc_arg_t *args)  /* argument structure for allocation */
 810 {
 811         int             error=0;
 812 
 813         ASSERT(args->minlen > 0);
 814         ASSERT(args->maxlen > 0);
 815         ASSERT(args->minlen <= args->maxlen);
 816         ASSERT(args->mod < args->prod);
 817         ASSERT(args->alignment > 0);
 818 
 819         /*
 820          * Branch to correct routine based on the type.
 821          */
 822         args->wasfromfl = 0;
 823         switch (args->type) {
 824         case XFS_ALLOCTYPE_THIS_AG:
 825                 error = xfs_alloc_ag_vextent_size(args);
 826                 break;
 827         case XFS_ALLOCTYPE_NEAR_BNO:
 828                 error = xfs_alloc_ag_vextent_near(args);
 829                 break;
 830         case XFS_ALLOCTYPE_THIS_BNO:
 831                 error = xfs_alloc_ag_vextent_exact(args);
 832                 break;
 833         default:
 834                 ASSERT(0);
 835                 /* NOTREACHED */
 836         }
 837 
 838         if (error || args->agbno == NULLAGBLOCK)
 839                 return error;
 840 
 841         ASSERT(args->len >= args->minlen);
 842         ASSERT(args->len <= args->maxlen);
 843         ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
 844         ASSERT(args->agbno % args->alignment == 0);
 845 
 846         /* if not file data, insert new block into the reverse map btree */
 847         if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
 848                 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
 849                                        args->agbno, args->len, &args->oinfo);
 850                 if (error)
 851                         return error;
 852         }
 853 
 854         if (!args->wasfromfl) {
 855                 error = xfs_alloc_update_counters(args->tp, args->pag,
 856                                                   args->agbp,
 857                                                   -((long)(args->len)));
 858                 if (error)
 859                         return error;
 860 
 861                 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 862                                               args->agbno, args->len));
 863         }
 864 
 865         xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
 866 
 867         XFS_STATS_INC(args->mp, xs_allocx);
 868         XFS_STATS_ADD(args->mp, xs_allocb, args->len);
 869         return error;
 870 }
 871 
 872 /*
 873  * Allocate a variable extent at exactly agno/bno.
 874  * Extent's length (returned in *len) will be between minlen and maxlen,
 875  * and of the form k * prod + mod unless there's nothing that large.
 876  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 877  */
 878 STATIC int                      /* error */
 879 xfs_alloc_ag_vextent_exact(
 880         xfs_alloc_arg_t *args)  /* allocation argument structure */
 881 {
 882         xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 883         xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 884         int             error;
 885         xfs_agblock_t   fbno;   /* start block of found extent */
 886         xfs_extlen_t    flen;   /* length of found extent */
 887         xfs_agblock_t   tbno;   /* start block of busy extent */
 888         xfs_extlen_t    tlen;   /* length of busy extent */
 889         xfs_agblock_t   tend;   /* end block of busy extent */
 890         int             i;      /* success/failure of operation */
 891         unsigned        busy_gen;
 892 
 893         ASSERT(args->alignment == 1);
 894 
 895         /*
 896          * Allocate/initialize a cursor for the by-number freespace btree.
 897          */
 898         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 899                                           args->agno, XFS_BTNUM_BNO);
 900 
 901         /*
 902          * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 903          * Look for the closest free block <= bno, it must contain bno
 904          * if any free block does.
 905          */
 906         error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 907         if (error)
 908                 goto error0;
 909         if (!i)
 910                 goto not_found;
 911 
 912         /*
 913          * Grab the freespace record.
 914          */
 915         error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 916         if (error)
 917                 goto error0;
 918         XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 919         ASSERT(fbno <= args->agbno);
 920 
 921         /*
 922          * Check for overlapping busy extents.
 923          */
 924         tbno = fbno;
 925         tlen = flen;
 926         xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
 927 
 928         /*
 929          * Give up if the start of the extent is busy, or the freespace isn't
 930          * long enough for the minimum request.
 931          */
 932         if (tbno > args->agbno)
 933                 goto not_found;
 934         if (tlen < args->minlen)
 935                 goto not_found;
 936         tend = tbno + tlen;
 937         if (tend < args->agbno + args->minlen)
 938                 goto not_found;
 939 
 940         /*
 941          * End of extent will be smaller of the freespace end and the
 942          * maximal requested end.
 943          *
 944          * Fix the length according to mod and prod if given.
 945          */
 946         args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 947                                                 - args->agbno;
 948         xfs_alloc_fix_len(args);
 949         ASSERT(args->agbno + args->len <= tend);
 950 
 951         /*
 952          * We are allocating agbno for args->len
 953          * Allocate/initialize a cursor for the by-size btree.
 954          */
 955         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 956                 args->agno, XFS_BTNUM_CNT);
 957         ASSERT(args->agbno + args->len <=
 958                 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 959         error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 960                                       args->len, XFSA_FIXUP_BNO_OK);
 961         if (error) {
 962                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 963                 goto error0;
 964         }
 965 
 966         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 967         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 968 
 969         args->wasfromfl = 0;
 970         trace_xfs_alloc_exact_done(args);
 971         return 0;
 972 
 973 not_found:
 974         /* Didn't find it, return null. */
 975         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 976         args->agbno = NULLAGBLOCK;
 977         trace_xfs_alloc_exact_notfound(args);
 978         return 0;
 979 
 980 error0:
 981         xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 982         trace_xfs_alloc_exact_error(args);
 983         return error;
 984 }
 985 
 986 /*
 987  * Search the btree in a given direction via the search cursor and compare
 988  * the records found against the good extent we've already found.
 989  */
 990 STATIC int
 991 xfs_alloc_find_best_extent(
 992         struct xfs_alloc_arg    *args,  /* allocation argument structure */
 993         struct xfs_btree_cur    **gcur, /* good cursor */
 994         struct xfs_btree_cur    **scur, /* searching cursor */
 995         xfs_agblock_t           gdiff,  /* difference for search comparison */
 996         xfs_agblock_t           *sbno,  /* extent found by search */
 997         xfs_extlen_t            *slen,  /* extent length */
 998         xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 999         xfs_extlen_t            *slena, /* aligned extent length */
1000         int                     dir)    /* 0 = search right, 1 = search left */
1001 {
1002         xfs_agblock_t           new;
1003         xfs_agblock_t           sdiff;
1004         int                     error;
1005         int                     i;
1006         unsigned                busy_gen;
1007 
1008         /* The good extent is perfect, no need to  search. */
1009         if (!gdiff)
1010                 goto out_use_good;
1011 
1012         /*
1013          * Look until we find a better one, run out of space or run off the end.
1014          */
1015         do {
1016                 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
1017                 if (error)
1018                         goto error0;
1019                 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1020                 xfs_alloc_compute_aligned(args, *sbno, *slen,
1021                                 sbnoa, slena, &busy_gen);
1022 
1023                 /*
1024                  * The good extent is closer than this one.
1025                  */
1026                 if (!dir) {
1027                         if (*sbnoa > args->max_agbno)
1028                                 goto out_use_good;
1029                         if (*sbnoa >= args->agbno + gdiff)
1030                                 goto out_use_good;
1031                 } else {
1032                         if (*sbnoa < args->min_agbno)
1033                                 goto out_use_good;
1034                         if (*sbnoa <= args->agbno - gdiff)
1035                                 goto out_use_good;
1036                 }
1037 
1038                 /*
1039                  * Same distance, compare length and pick the best.
1040                  */
1041                 if (*slena >= args->minlen) {
1042                         args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
1043                         xfs_alloc_fix_len(args);
1044 
1045                         sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1046                                                        args->alignment,
1047                                                        args->datatype, *sbnoa,
1048                                                        *slena, &new);
1049 
1050                         /*
1051                          * Choose closer size and invalidate other cursor.
1052                          */
1053                         if (sdiff < gdiff)
1054                                 goto out_use_search;
1055                         goto out_use_good;
1056                 }
1057 
1058                 if (!dir)
1059                         error = xfs_btree_increment(*scur, 0, &i);
1060                 else
1061                         error = xfs_btree_decrement(*scur, 0, &i);
1062                 if (error)
1063                         goto error0;
1064         } while (i);
1065 
1066 out_use_good:
1067         xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
1068         *scur = NULL;
1069         return 0;
1070 
1071 out_use_search:
1072         xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
1073         *gcur = NULL;
1074         return 0;
1075 
1076 error0:
1077         /* caller invalidates cursors */
1078         return error;
1079 }
1080 
1081 /*
1082  * Allocate a variable extent near bno in the allocation group agno.
1083  * Extent's length (returned in len) will be between minlen and maxlen,
1084  * and of the form k * prod + mod unless there's nothing that large.
1085  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1086  */
1087 STATIC int                              /* error */
1088 xfs_alloc_ag_vextent_near(
1089         xfs_alloc_arg_t *args)          /* allocation argument structure */
1090 {
1091         xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
1092         xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
1093         xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
1094         xfs_agblock_t   gtbno;          /* start bno of right side entry */
1095         xfs_agblock_t   gtbnoa;         /* aligned ... */
1096         xfs_extlen_t    gtdiff;         /* difference to right side entry */
1097         xfs_extlen_t    gtlen;          /* length of right side entry */
1098         xfs_extlen_t    gtlena;         /* aligned ... */
1099         xfs_agblock_t   gtnew;          /* useful start bno of right side */
1100         int             error;          /* error code */
1101         int             i;              /* result code, temporary */
1102         int             j;              /* result code, temporary */
1103         xfs_agblock_t   ltbno;          /* start bno of left side entry */
1104         xfs_agblock_t   ltbnoa;         /* aligned ... */
1105         xfs_extlen_t    ltdiff;         /* difference to left side entry */
1106         xfs_extlen_t    ltlen;          /* length of left side entry */
1107         xfs_extlen_t    ltlena;         /* aligned ... */
1108         xfs_agblock_t   ltnew;          /* useful start bno of left side */
1109         xfs_extlen_t    rlen;           /* length of returned extent */
1110         bool            busy;
1111         unsigned        busy_gen;
1112 #ifdef DEBUG
1113         /*
1114          * Randomly don't execute the first algorithm.
1115          */
1116         int             dofirst;        /* set to do first algorithm */
1117 
1118         dofirst = prandom_u32() & 1;
1119 #endif
1120 
1121         /* handle unitialized agbno range so caller doesn't have to */
1122         if (!args->min_agbno && !args->max_agbno)
1123                 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1124         ASSERT(args->min_agbno <= args->max_agbno);
1125 
1126         /* clamp agbno to the range if it's outside */
1127         if (args->agbno < args->min_agbno)
1128                 args->agbno = args->min_agbno;
1129         if (args->agbno > args->max_agbno)
1130                 args->agbno = args->max_agbno;
1131 
1132 restart:
1133         bno_cur_lt = NULL;
1134         bno_cur_gt = NULL;
1135         ltlen = 0;
1136         gtlena = 0;
1137         ltlena = 0;
1138         busy = false;
1139 
1140         /*
1141          * Get a cursor for the by-size btree.
1142          */
1143         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1144                 args->agno, XFS_BTNUM_CNT);
1145 
1146         /*
1147          * See if there are any free extents as big as maxlen.
1148          */
1149         if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1150                 goto error0;
1151         /*
1152          * If none, then pick up the last entry in the tree unless the
1153          * tree is empty.
1154          */
1155         if (!i) {
1156                 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1157                                 &ltlen, &i)))
1158                         goto error0;
1159                 if (i == 0 || ltlen == 0) {
1160                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1161                         trace_xfs_alloc_near_noentry(args);
1162                         return 0;
1163                 }
1164                 ASSERT(i == 1);
1165         }
1166         args->wasfromfl = 0;
1167 
1168         /*
1169          * First algorithm.
1170          * If the requested extent is large wrt the freespaces available
1171          * in this a.g., then the cursor will be pointing to a btree entry
1172          * near the right edge of the tree.  If it's in the last btree leaf
1173          * block, then we just examine all the entries in that block
1174          * that are big enough, and pick the best one.
1175          * This is written as a while loop so we can break out of it,
1176          * but we never loop back to the top.
1177          */
1178         while (xfs_btree_islastblock(cnt_cur, 0)) {
1179                 xfs_extlen_t    bdiff;
1180                 int             besti=0;
1181                 xfs_extlen_t    blen=0;
1182                 xfs_agblock_t   bnew=0;
1183 
1184 #ifdef DEBUG
1185                 if (dofirst)
1186                         break;
1187 #endif
1188                 /*
1189                  * Start from the entry that lookup found, sequence through
1190                  * all larger free blocks.  If we're actually pointing at a
1191                  * record smaller than maxlen, go to the start of this block,
1192                  * and skip all those smaller than minlen.
1193                  */
1194                 if (ltlen || args->alignment > 1) {
1195                         cnt_cur->bc_ptrs[0] = 1;
1196                         do {
1197                                 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1198                                                 &ltlen, &i)))
1199                                         goto error0;
1200                                 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1201                                 if (ltlen >= args->minlen)
1202                                         break;
1203                                 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1204                                         goto error0;
1205                         } while (i);
1206                         ASSERT(ltlen >= args->minlen);
1207                         if (!i)
1208                                 break;
1209                 }
1210                 i = cnt_cur->bc_ptrs[0];
1211                 for (j = 1, blen = 0, bdiff = 0;
1212                      !error && j && (blen < args->maxlen || bdiff > 0);
1213                      error = xfs_btree_increment(cnt_cur, 0, &j)) {
1214                         /*
1215                          * For each entry, decide if it's better than
1216                          * the previous best entry.
1217                          */
1218                         if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1219                                 goto error0;
1220                         XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1221                         busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1222                                         &ltbnoa, &ltlena, &busy_gen);
1223                         if (ltlena < args->minlen)
1224                                 continue;
1225                         if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1226                                 continue;
1227                         args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1228                         xfs_alloc_fix_len(args);
1229                         ASSERT(args->len >= args->minlen);
1230                         if (args->len < blen)
1231                                 continue;
1232                         ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1233                                 args->alignment, args->datatype, ltbnoa,
1234                                 ltlena, &ltnew);
1235                         if (ltnew != NULLAGBLOCK &&
1236                             (args->len > blen || ltdiff < bdiff)) {
1237                                 bdiff = ltdiff;
1238                                 bnew = ltnew;
1239                                 blen = args->len;
1240                                 besti = cnt_cur->bc_ptrs[0];
1241                         }
1242                 }
1243                 /*
1244                  * It didn't work.  We COULD be in a case where
1245                  * there's a good record somewhere, so try again.
1246                  */
1247                 if (blen == 0)
1248                         break;
1249                 /*
1250                  * Point at the best entry, and retrieve it again.
1251                  */
1252                 cnt_cur->bc_ptrs[0] = besti;
1253                 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1254                         goto error0;
1255                 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1256                 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1257                 args->len = blen;
1258 
1259                 /*
1260                  * We are allocating starting at bnew for blen blocks.
1261                  */
1262                 args->agbno = bnew;
1263                 ASSERT(bnew >= ltbno);
1264                 ASSERT(bnew + blen <= ltbno + ltlen);
1265                 /*
1266                  * Set up a cursor for the by-bno tree.
1267                  */
1268                 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1269                         args->agbp, args->agno, XFS_BTNUM_BNO);
1270                 /*
1271                  * Fix up the btree entries.
1272                  */
1273                 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1274                                 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1275                         goto error0;
1276                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1277                 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1278 
1279                 trace_xfs_alloc_near_first(args);
1280                 return 0;
1281         }
1282         /*
1283          * Second algorithm.
1284          * Search in the by-bno tree to the left and to the right
1285          * simultaneously, until in each case we find a space big enough,
1286          * or run into the edge of the tree.  When we run into the edge,
1287          * we deallocate that cursor.
1288          * If both searches succeed, we compare the two spaces and pick
1289          * the better one.
1290          * With alignment, it's possible for both to fail; the upper
1291          * level algorithm that picks allocation groups for allocations
1292          * is not supposed to do this.
1293          */
1294         /*
1295          * Allocate and initialize the cursor for the leftward search.
1296          */
1297         bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1298                 args->agno, XFS_BTNUM_BNO);
1299         /*
1300          * Lookup <= bno to find the leftward search's starting point.
1301          */
1302         if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1303                 goto error0;
1304         if (!i) {
1305                 /*
1306                  * Didn't find anything; use this cursor for the rightward
1307                  * search.
1308                  */
1309                 bno_cur_gt = bno_cur_lt;
1310                 bno_cur_lt = NULL;
1311         }
1312         /*
1313          * Found something.  Duplicate the cursor for the rightward search.
1314          */
1315         else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1316                 goto error0;
1317         /*
1318          * Increment the cursor, so we will point at the entry just right
1319          * of the leftward entry if any, or to the leftmost entry.
1320          */
1321         if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1322                 goto error0;
1323         if (!i) {
1324                 /*
1325                  * It failed, there are no rightward entries.
1326                  */
1327                 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1328                 bno_cur_gt = NULL;
1329         }
1330         /*
1331          * Loop going left with the leftward cursor, right with the
1332          * rightward cursor, until either both directions give up or
1333          * we find an entry at least as big as minlen.
1334          */
1335         do {
1336                 if (bno_cur_lt) {
1337                         if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1338                                 goto error0;
1339                         XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1340                         busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1341                                         &ltbnoa, &ltlena, &busy_gen);
1342                         if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1343                                 break;
1344                         if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1345                                 goto error0;
1346                         if (!i || ltbnoa < args->min_agbno) {
1347                                 xfs_btree_del_cursor(bno_cur_lt,
1348                                                      XFS_BTREE_NOERROR);
1349                                 bno_cur_lt = NULL;
1350                         }
1351                 }
1352                 if (bno_cur_gt) {
1353                         if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1354                                 goto error0;
1355                         XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1356                         busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1357                                         &gtbnoa, &gtlena, &busy_gen);
1358                         if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1359                                 break;
1360                         if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1361                                 goto error0;
1362                         if (!i || gtbnoa > args->max_agbno) {
1363                                 xfs_btree_del_cursor(bno_cur_gt,
1364                                                      XFS_BTREE_NOERROR);
1365                                 bno_cur_gt = NULL;
1366                         }
1367                 }
1368         } while (bno_cur_lt || bno_cur_gt);
1369 
1370         /*
1371          * Got both cursors still active, need to find better entry.
1372          */
1373         if (bno_cur_lt && bno_cur_gt) {
1374                 if (ltlena >= args->minlen) {
1375                         /*
1376                          * Left side is good, look for a right side entry.
1377                          */
1378                         args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1379                         xfs_alloc_fix_len(args);
1380                         ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1381                                 args->alignment, args->datatype, ltbnoa,
1382                                 ltlena, &ltnew);
1383 
1384                         error = xfs_alloc_find_best_extent(args,
1385                                                 &bno_cur_lt, &bno_cur_gt,
1386                                                 ltdiff, &gtbno, &gtlen,
1387                                                 &gtbnoa, &gtlena,
1388                                                 0 /* search right */);
1389                 } else {
1390                         ASSERT(gtlena >= args->minlen);
1391 
1392                         /*
1393                          * Right side is good, look for a left side entry.
1394                          */
1395                         args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1396                         xfs_alloc_fix_len(args);
1397                         gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1398                                 args->alignment, args->datatype, gtbnoa,
1399                                 gtlena, &gtnew);
1400 
1401                         error = xfs_alloc_find_best_extent(args,
1402                                                 &bno_cur_gt, &bno_cur_lt,
1403                                                 gtdiff, &ltbno, &ltlen,
1404                                                 &ltbnoa, &ltlena,
1405                                                 1 /* search left */);
1406                 }
1407 
1408                 if (error)
1409                         goto error0;
1410         }
1411 
1412         /*
1413          * If we couldn't get anything, give up.
1414          */
1415         if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1416                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1417 
1418                 if (busy) {
1419                         trace_xfs_alloc_near_busy(args);
1420                         xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1421                         goto restart;
1422                 }
1423                 trace_xfs_alloc_size_neither(args);
1424                 args->agbno = NULLAGBLOCK;
1425                 return 0;
1426         }
1427 
1428         /*
1429          * At this point we have selected a freespace entry, either to the
1430          * left or to the right.  If it's on the right, copy all the
1431          * useful variables to the "left" set so we only have one
1432          * copy of this code.
1433          */
1434         if (bno_cur_gt) {
1435                 bno_cur_lt = bno_cur_gt;
1436                 bno_cur_gt = NULL;
1437                 ltbno = gtbno;
1438                 ltbnoa = gtbnoa;
1439                 ltlen = gtlen;
1440                 ltlena = gtlena;
1441                 j = 1;
1442         } else
1443                 j = 0;
1444 
1445         /*
1446          * Fix up the length and compute the useful address.
1447          */
1448         args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1449         xfs_alloc_fix_len(args);
1450         rlen = args->len;
1451         (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1452                                      args->datatype, ltbnoa, ltlena, &ltnew);
1453         ASSERT(ltnew >= ltbno);
1454         ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1455         ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1456         ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1457         args->agbno = ltnew;
1458 
1459         if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1460                         ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1461                 goto error0;
1462 
1463         if (j)
1464                 trace_xfs_alloc_near_greater(args);
1465         else
1466                 trace_xfs_alloc_near_lesser(args);
1467 
1468         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1469         xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1470         return 0;
1471 
1472  error0:
1473         trace_xfs_alloc_near_error(args);
1474         if (cnt_cur != NULL)
1475                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1476         if (bno_cur_lt != NULL)
1477                 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1478         if (bno_cur_gt != NULL)
1479                 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1480         return error;
1481 }
1482 
1483 /*
1484  * Allocate a variable extent anywhere in the allocation group agno.
1485  * Extent's length (returned in len) will be between minlen and maxlen,
1486  * and of the form k * prod + mod unless there's nothing that large.
1487  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1488  */
1489 STATIC int                              /* error */
1490 xfs_alloc_ag_vextent_size(
1491         xfs_alloc_arg_t *args)          /* allocation argument structure */
1492 {
1493         xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1494         xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1495         int             error;          /* error result */
1496         xfs_agblock_t   fbno;           /* start of found freespace */
1497         xfs_extlen_t    flen;           /* length of found freespace */
1498         int             i;              /* temp status variable */
1499         xfs_agblock_t   rbno;           /* returned block number */
1500         xfs_extlen_t    rlen;           /* length of returned extent */
1501         bool            busy;
1502         unsigned        busy_gen;
1503 
1504 restart:
1505         /*
1506          * Allocate and initialize a cursor for the by-size btree.
1507          */
1508         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1509                 args->agno, XFS_BTNUM_CNT);
1510         bno_cur = NULL;
1511         busy = false;
1512 
1513         /*
1514          * Look for an entry >= maxlen+alignment-1 blocks.
1515          */
1516         if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1517                         args->maxlen + args->alignment - 1, &i)))
1518                 goto error0;
1519 
1520         /*
1521          * If none then we have to settle for a smaller extent. In the case that
1522          * there are no large extents, this will return the last entry in the
1523          * tree unless the tree is empty. In the case that there are only busy
1524          * large extents, this will return the largest small extent unless there
1525          * are no smaller extents available.
1526          */
1527         if (!i) {
1528                 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1529                                                    &fbno, &flen, &i);
1530                 if (error)
1531                         goto error0;
1532                 if (i == 0 || flen == 0) {
1533                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1534                         trace_xfs_alloc_size_noentry(args);
1535                         return 0;
1536                 }
1537                 ASSERT(i == 1);
1538                 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1539                                 &rlen, &busy_gen);
1540         } else {
1541                 /*
1542                  * Search for a non-busy extent that is large enough.
1543                  */
1544                 for (;;) {
1545                         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1546                         if (error)
1547                                 goto error0;
1548                         XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1549 
1550                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
1551                                         &rbno, &rlen, &busy_gen);
1552 
1553                         if (rlen >= args->maxlen)
1554                                 break;
1555 
1556                         error = xfs_btree_increment(cnt_cur, 0, &i);
1557                         if (error)
1558                                 goto error0;
1559                         if (i == 0) {
1560                                 /*
1561                                  * Our only valid extents must have been busy.
1562                                  * Make it unbusy by forcing the log out and
1563                                  * retrying.
1564                                  */
1565                                 xfs_btree_del_cursor(cnt_cur,
1566                                                      XFS_BTREE_NOERROR);
1567                                 trace_xfs_alloc_size_busy(args);
1568                                 xfs_extent_busy_flush(args->mp,
1569                                                         args->pag, busy_gen);
1570                                 goto restart;
1571                         }
1572                 }
1573         }
1574 
1575         /*
1576          * In the first case above, we got the last entry in the
1577          * by-size btree.  Now we check to see if the space hits maxlen
1578          * once aligned; if not, we search left for something better.
1579          * This can't happen in the second case above.
1580          */
1581         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1582         XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1583                         (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1584         if (rlen < args->maxlen) {
1585                 xfs_agblock_t   bestfbno;
1586                 xfs_extlen_t    bestflen;
1587                 xfs_agblock_t   bestrbno;
1588                 xfs_extlen_t    bestrlen;
1589 
1590                 bestrlen = rlen;
1591                 bestrbno = rbno;
1592                 bestflen = flen;
1593                 bestfbno = fbno;
1594                 for (;;) {
1595                         if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1596                                 goto error0;
1597                         if (i == 0)
1598                                 break;
1599                         if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1600                                         &i)))
1601                                 goto error0;
1602                         XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1603                         if (flen < bestrlen)
1604                                 break;
1605                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
1606                                         &rbno, &rlen, &busy_gen);
1607                         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1608                         XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1609                                 (rlen <= flen && rbno + rlen <= fbno + flen),
1610                                 error0);
1611                         if (rlen > bestrlen) {
1612                                 bestrlen = rlen;
1613                                 bestrbno = rbno;
1614                                 bestflen = flen;
1615                                 bestfbno = fbno;
1616                                 if (rlen == args->maxlen)
1617                                         break;
1618                         }
1619                 }
1620                 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1621                                 &i)))
1622                         goto error0;
1623                 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1624                 rlen = bestrlen;
1625                 rbno = bestrbno;
1626                 flen = bestflen;
1627                 fbno = bestfbno;
1628         }
1629         args->wasfromfl = 0;
1630         /*
1631          * Fix up the length.
1632          */
1633         args->len = rlen;
1634         if (rlen < args->minlen) {
1635                 if (busy) {
1636                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1637                         trace_xfs_alloc_size_busy(args);
1638                         xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1639                         goto restart;
1640                 }
1641                 goto out_nominleft;
1642         }
1643         xfs_alloc_fix_len(args);
1644 
1645         rlen = args->len;
1646         XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1647         /*
1648          * Allocate and initialize a cursor for the by-block tree.
1649          */
1650         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1651                 args->agno, XFS_BTNUM_BNO);
1652         if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1653                         rbno, rlen, XFSA_FIXUP_CNT_OK)))
1654                 goto error0;
1655         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1656         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1657         cnt_cur = bno_cur = NULL;
1658         args->len = rlen;
1659         args->agbno = rbno;
1660         XFS_WANT_CORRUPTED_GOTO(args->mp,
1661                 args->agbno + args->len <=
1662                         be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1663                 error0);
1664         trace_xfs_alloc_size_done(args);
1665         return 0;
1666 
1667 error0:
1668         trace_xfs_alloc_size_error(args);
1669         if (cnt_cur)
1670                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1671         if (bno_cur)
1672                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1673         return error;
1674 
1675 out_nominleft:
1676         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1677         trace_xfs_alloc_size_nominleft(args);
1678         args->agbno = NULLAGBLOCK;
1679         return 0;
1680 }
1681 
1682 /*
1683  * Free the extent starting at agno/bno for length.
1684  */
1685 STATIC int
1686 xfs_free_ag_extent(
1687         struct xfs_trans                *tp,
1688         struct xfs_buf                  *agbp,
1689         xfs_agnumber_t                  agno,
1690         xfs_agblock_t                   bno,
1691         xfs_extlen_t                    len,
1692         const struct xfs_owner_info     *oinfo,
1693         enum xfs_ag_resv_type           type)
1694 {
1695         struct xfs_mount                *mp;
1696         struct xfs_perag                *pag;
1697         struct xfs_btree_cur            *bno_cur;
1698         struct xfs_btree_cur            *cnt_cur;
1699         xfs_agblock_t                   gtbno; /* start of right neighbor */
1700         xfs_extlen_t                    gtlen; /* length of right neighbor */
1701         xfs_agblock_t                   ltbno; /* start of left neighbor */
1702         xfs_extlen_t                    ltlen; /* length of left neighbor */
1703         xfs_agblock_t                   nbno; /* new starting block of freesp */
1704         xfs_extlen_t                    nlen; /* new length of freespace */
1705         int                             haveleft; /* have a left neighbor */
1706         int                             haveright; /* have a right neighbor */
1707         int                             i;
1708         int                             error;
1709 
1710         bno_cur = cnt_cur = NULL;
1711         mp = tp->t_mountp;
1712 
1713         if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1714                 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1715                 if (error)
1716                         goto error0;
1717         }
1718 
1719         /*
1720          * Allocate and initialize a cursor for the by-block btree.
1721          */
1722         bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1723         /*
1724          * Look for a neighboring block on the left (lower block numbers)
1725          * that is contiguous with this space.
1726          */
1727         if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1728                 goto error0;
1729         if (haveleft) {
1730                 /*
1731                  * There is a block to our left.
1732                  */
1733                 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1734                         goto error0;
1735                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1736                 /*
1737                  * It's not contiguous, though.
1738                  */
1739                 if (ltbno + ltlen < bno)
1740                         haveleft = 0;
1741                 else {
1742                         /*
1743                          * If this failure happens the request to free this
1744                          * space was invalid, it's (partly) already free.
1745                          * Very bad.
1746                          */
1747                         XFS_WANT_CORRUPTED_GOTO(mp,
1748                                                 ltbno + ltlen <= bno, error0);
1749                 }
1750         }
1751         /*
1752          * Look for a neighboring block on the right (higher block numbers)
1753          * that is contiguous with this space.
1754          */
1755         if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1756                 goto error0;
1757         if (haveright) {
1758                 /*
1759                  * There is a block to our right.
1760                  */
1761                 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1762                         goto error0;
1763                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1764                 /*
1765                  * It's not contiguous, though.
1766                  */
1767                 if (bno + len < gtbno)
1768                         haveright = 0;
1769                 else {
1770                         /*
1771                          * If this failure happens the request to free this
1772                          * space was invalid, it's (partly) already free.
1773                          * Very bad.
1774                          */
1775                         XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1776                 }
1777         }
1778         /*
1779          * Now allocate and initialize a cursor for the by-size tree.
1780          */
1781         cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1782         /*
1783          * Have both left and right contiguous neighbors.
1784          * Merge all three into a single free block.
1785          */
1786         if (haveleft && haveright) {
1787                 /*
1788                  * Delete the old by-size entry on the left.
1789                  */
1790                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1791                         goto error0;
1792                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1793                 if ((error = xfs_btree_delete(cnt_cur, &i)))
1794                         goto error0;
1795                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1796                 /*
1797                  * Delete the old by-size entry on the right.
1798                  */
1799                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1800                         goto error0;
1801                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1802                 if ((error = xfs_btree_delete(cnt_cur, &i)))
1803                         goto error0;
1804                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1805                 /*
1806                  * Delete the old by-block entry for the right block.
1807                  */
1808                 if ((error = xfs_btree_delete(bno_cur, &i)))
1809                         goto error0;
1810                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1811                 /*
1812                  * Move the by-block cursor back to the left neighbor.
1813                  */
1814                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1815                         goto error0;
1816                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1817 #ifdef DEBUG
1818                 /*
1819                  * Check that this is the right record: delete didn't
1820                  * mangle the cursor.
1821                  */
1822                 {
1823                         xfs_agblock_t   xxbno;
1824                         xfs_extlen_t    xxlen;
1825 
1826                         if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1827                                         &i)))
1828                                 goto error0;
1829                         XFS_WANT_CORRUPTED_GOTO(mp,
1830                                 i == 1 && xxbno == ltbno && xxlen == ltlen,
1831                                 error0);
1832                 }
1833 #endif
1834                 /*
1835                  * Update remaining by-block entry to the new, joined block.
1836                  */
1837                 nbno = ltbno;
1838                 nlen = len + ltlen + gtlen;
1839                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1840                         goto error0;
1841         }
1842         /*
1843          * Have only a left contiguous neighbor.
1844          * Merge it together with the new freespace.
1845          */
1846         else if (haveleft) {
1847                 /*
1848                  * Delete the old by-size entry on the left.
1849                  */
1850                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1851                         goto error0;
1852                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1853                 if ((error = xfs_btree_delete(cnt_cur, &i)))
1854                         goto error0;
1855                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1856                 /*
1857                  * Back up the by-block cursor to the left neighbor, and
1858                  * update its length.
1859                  */
1860                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1861                         goto error0;
1862                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1863                 nbno = ltbno;
1864                 nlen = len + ltlen;
1865                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1866                         goto error0;
1867         }
1868         /*
1869          * Have only a right contiguous neighbor.
1870          * Merge it together with the new freespace.
1871          */
1872         else if (haveright) {
1873                 /*
1874                  * Delete the old by-size entry on the right.
1875                  */
1876                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1877                         goto error0;
1878                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1879                 if ((error = xfs_btree_delete(cnt_cur, &i)))
1880                         goto error0;
1881                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1882                 /*
1883                  * Update the starting block and length of the right
1884                  * neighbor in the by-block tree.
1885                  */
1886                 nbno = bno;
1887                 nlen = len + gtlen;
1888                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1889                         goto error0;
1890         }
1891         /*
1892          * No contiguous neighbors.
1893          * Insert the new freespace into the by-block tree.
1894          */
1895         else {
1896                 nbno = bno;
1897                 nlen = len;
1898                 if ((error = xfs_btree_insert(bno_cur, &i)))
1899                         goto error0;
1900                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1901         }
1902         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1903         bno_cur = NULL;
1904         /*
1905          * In all cases we need to insert the new freespace in the by-size tree.
1906          */
1907         if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1908                 goto error0;
1909         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1910         if ((error = xfs_btree_insert(cnt_cur, &i)))
1911                 goto error0;
1912         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1913         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1914         cnt_cur = NULL;
1915 
1916         /*
1917          * Update the freespace totals in the ag and superblock.
1918          */
1919         pag = xfs_perag_get(mp, agno);
1920         error = xfs_alloc_update_counters(tp, pag, agbp, len);
1921         xfs_ag_resv_free_extent(pag, type, tp, len);
1922         xfs_perag_put(pag);
1923         if (error)
1924                 goto error0;
1925 
1926         XFS_STATS_INC(mp, xs_freex);
1927         XFS_STATS_ADD(mp, xs_freeb, len);
1928 
1929         trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
1930 
1931         return 0;
1932 
1933  error0:
1934         trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
1935         if (bno_cur)
1936                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1937         if (cnt_cur)
1938                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1939         return error;
1940 }
1941 
1942 /*
1943  * Visible (exported) allocation/free functions.
1944  * Some of these are used just by xfs_alloc_btree.c and this file.
1945  */
1946 
1947 /*
1948  * Compute and fill in value of m_ag_maxlevels.
1949  */
1950 void
1951 xfs_alloc_compute_maxlevels(
1952         xfs_mount_t     *mp)    /* file system mount structure */
1953 {
1954         mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
1955                         (mp->m_sb.sb_agblocks + 1) / 2);
1956 }
1957 
1958 /*
1959  * Find the length of the longest extent in an AG.  The 'need' parameter
1960  * specifies how much space we're going to need for the AGFL and the
1961  * 'reserved' parameter tells us how many blocks in this AG are reserved for
1962  * other callers.
1963  */
1964 xfs_extlen_t
1965 xfs_alloc_longest_free_extent(
1966         struct xfs_perag        *pag,
1967         xfs_extlen_t            need,
1968         xfs_extlen_t            reserved)
1969 {
1970         xfs_extlen_t            delta = 0;
1971 
1972         /*
1973          * If the AGFL needs a recharge, we'll have to subtract that from the
1974          * longest extent.
1975          */
1976         if (need > pag->pagf_flcount)
1977                 delta = need - pag->pagf_flcount;
1978 
1979         /*
1980          * If we cannot maintain others' reservations with space from the
1981          * not-longest freesp extents, we'll have to subtract /that/ from
1982          * the longest extent too.
1983          */
1984         if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1985                 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1986 
1987         /*
1988          * If the longest extent is long enough to satisfy all the
1989          * reservations and AGFL rules in place, we can return this extent.
1990          */
1991         if (pag->pagf_longest > delta)
1992                 return pag->pagf_longest - delta;
1993 
1994         /* Otherwise, let the caller try for 1 block if there's space. */
1995         return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1996 }
1997 
1998 unsigned int
1999 xfs_alloc_min_freelist(
2000         struct xfs_mount        *mp,
2001         struct xfs_perag        *pag)
2002 {
2003         unsigned int            min_free;
2004 
2005         /* space needed by-bno freespace btree */
2006         min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
2007                                        mp->m_ag_maxlevels);
2008         /* space needed by-size freespace btree */
2009         min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
2010                                        mp->m_ag_maxlevels);
2011         /* space needed reverse mapping used space btree */
2012         if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2013                 min_free += min_t(unsigned int,
2014                                   pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2015                                   mp->m_rmap_maxlevels);
2016 
2017         return min_free;
2018 }
2019 
2020 /*
2021  * Check if the operation we are fixing up the freelist for should go ahead or
2022  * not. If we are freeing blocks, we always allow it, otherwise the allocation
2023  * is dependent on whether the size and shape of free space available will
2024  * permit the requested allocation to take place.
2025  */
2026 static bool
2027 xfs_alloc_space_available(
2028         struct xfs_alloc_arg    *args,
2029         xfs_extlen_t            min_free,
2030         int                     flags)
2031 {
2032         struct xfs_perag        *pag = args->pag;
2033         xfs_extlen_t            alloc_len, longest;
2034         xfs_extlen_t            reservation; /* blocks that are still reserved */
2035         int                     available;
2036         xfs_extlen_t            agflcount;
2037 
2038         if (flags & XFS_ALLOC_FLAG_FREEING)
2039                 return true;
2040 
2041         reservation = xfs_ag_resv_needed(pag, args->resv);
2042 
2043         /* do we have enough contiguous free space for the allocation? */
2044         alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2045         longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2046         if (longest < alloc_len)
2047                 return false;
2048 
2049         /*
2050          * Do we have enough free space remaining for the allocation? Don't
2051          * account extra agfl blocks because we are about to defer free them,
2052          * making them unavailable until the current transaction commits.
2053          */
2054         agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2055         available = (int)(pag->pagf_freeblks + agflcount -
2056                           reservation - min_free - args->minleft);
2057         if (available < (int)max(args->total, alloc_len))
2058                 return false;
2059 
2060         /*
2061          * Clamp maxlen to the amount of free space available for the actual
2062          * extent allocation.
2063          */
2064         if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2065                 args->maxlen = available;
2066                 ASSERT(args->maxlen > 0);
2067                 ASSERT(args->maxlen >= args->minlen);
2068         }
2069 
2070         return true;
2071 }
2072 
2073 int
2074 xfs_free_agfl_block(
2075         struct xfs_trans        *tp,
2076         xfs_agnumber_t          agno,
2077         xfs_agblock_t           agbno,
2078         struct xfs_buf          *agbp,
2079         struct xfs_owner_info   *oinfo)
2080 {
2081         int                     error;
2082         struct xfs_buf          *bp;
2083 
2084         error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2085                                    XFS_AG_RESV_AGFL);
2086         if (error)
2087                 return error;
2088 
2089         bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
2090         if (!bp)
2091                 return -EFSCORRUPTED;
2092         xfs_trans_binval(tp, bp);
2093 
2094         return 0;
2095 }
2096 
2097 /*
2098  * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2099  * is to detect an agfl header padding mismatch between current and early v5
2100  * kernels. This problem manifests as a 1-slot size difference between the
2101  * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2102  * may also catch variants of agfl count corruption unrelated to padding. Either
2103  * way, we'll reset the agfl and warn the user.
2104  *
2105  * Return true if a reset is required before the agfl can be used, false
2106  * otherwise.
2107  */
2108 static bool
2109 xfs_agfl_needs_reset(
2110         struct xfs_mount        *mp,
2111         struct xfs_agf          *agf)
2112 {
2113         uint32_t                f = be32_to_cpu(agf->agf_flfirst);
2114         uint32_t                l = be32_to_cpu(agf->agf_fllast);
2115         uint32_t                c = be32_to_cpu(agf->agf_flcount);
2116         int                     agfl_size = xfs_agfl_size(mp);
2117         int                     active;
2118 
2119         /* no agfl header on v4 supers */
2120         if (!xfs_sb_version_hascrc(&mp->m_sb))
2121                 return false;
2122 
2123         /*
2124          * The agf read verifier catches severe corruption of these fields.
2125          * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2126          * the verifier allows it.
2127          */
2128         if (f >= agfl_size || l >= agfl_size)
2129                 return true;
2130         if (c > agfl_size)
2131                 return true;
2132 
2133         /*
2134          * Check consistency between the on-disk count and the active range. An
2135          * agfl padding mismatch manifests as an inconsistent flcount.
2136          */
2137         if (c && l >= f)
2138                 active = l - f + 1;
2139         else if (c)
2140                 active = agfl_size - f + l + 1;
2141         else
2142                 active = 0;
2143 
2144         return active != c;
2145 }
2146 
2147 /*
2148  * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2149  * agfl content cannot be trusted. Warn the user that a repair is required to
2150  * recover leaked blocks.
2151  *
2152  * The purpose of this mechanism is to handle filesystems affected by the agfl
2153  * header padding mismatch problem. A reset keeps the filesystem online with a
2154  * relatively minor free space accounting inconsistency rather than suffer the
2155  * inevitable crash from use of an invalid agfl block.
2156  */
2157 static void
2158 xfs_agfl_reset(
2159         struct xfs_trans        *tp,
2160         struct xfs_buf          *agbp,
2161         struct xfs_perag        *pag)
2162 {
2163         struct xfs_mount        *mp = tp->t_mountp;
2164         struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
2165 
2166         ASSERT(pag->pagf_agflreset);
2167         trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2168 
2169         xfs_warn(mp,
2170                "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2171                "Please unmount and run xfs_repair.",
2172                  pag->pag_agno, pag->pagf_flcount);
2173 
2174         agf->agf_flfirst = 0;
2175         agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2176         agf->agf_flcount = 0;
2177         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2178                                     XFS_AGF_FLCOUNT);
2179 
2180         pag->pagf_flcount = 0;
2181         pag->pagf_agflreset = false;
2182 }
2183 
2184 /*
2185  * Defer an AGFL block free. This is effectively equivalent to
2186  * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2187  *
2188  * Deferring AGFL frees helps prevent log reservation overruns due to too many
2189  * allocation operations in a transaction. AGFL frees are prone to this problem
2190  * because for one they are always freed one at a time. Further, an immediate
2191  * AGFL block free can cause a btree join and require another block free before
2192  * the real allocation can proceed. Deferring the free disconnects freeing up
2193  * the AGFL slot from freeing the block.
2194  */
2195 STATIC void
2196 xfs_defer_agfl_block(
2197         struct xfs_trans                *tp,
2198         xfs_agnumber_t                  agno,
2199         xfs_fsblock_t                   agbno,
2200         struct xfs_owner_info           *oinfo)
2201 {
2202         struct xfs_mount                *mp = tp->t_mountp;
2203         struct xfs_extent_free_item     *new;           /* new element */
2204 
2205         ASSERT(xfs_bmap_free_item_zone != NULL);
2206         ASSERT(oinfo != NULL);
2207 
2208         new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
2209         new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2210         new->xefi_blockcount = 1;
2211         new->xefi_oinfo = *oinfo;
2212 
2213         trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2214 
2215         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
2216 }
2217 
2218 /*
2219  * Decide whether to use this allocation group for this allocation.
2220  * If so, fix up the btree freelist's size.
2221  */
2222 int                     /* error */
2223 xfs_alloc_fix_freelist(
2224         struct xfs_alloc_arg    *args,  /* allocation argument structure */
2225         int                     flags)  /* XFS_ALLOC_FLAG_... */
2226 {
2227         struct xfs_mount        *mp = args->mp;
2228         struct xfs_perag        *pag = args->pag;
2229         struct xfs_trans        *tp = args->tp;
2230         struct xfs_buf          *agbp = NULL;
2231         struct xfs_buf          *agflbp = NULL;
2232         struct xfs_alloc_arg    targs;  /* local allocation arguments */
2233         xfs_agblock_t           bno;    /* freelist block */
2234         xfs_extlen_t            need;   /* total blocks needed in freelist */
2235         int                     error = 0;
2236 
2237         /* deferred ops (AGFL block frees) require permanent transactions */
2238         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2239 
2240         if (!pag->pagf_init) {
2241                 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2242                 if (error)
2243                         goto out_no_agbp;
2244                 if (!pag->pagf_init) {
2245                         ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2246                         ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2247                         goto out_agbp_relse;
2248                 }
2249         }
2250 
2251         /*
2252          * If this is a metadata preferred pag and we are user data then try
2253          * somewhere else if we are not being asked to try harder at this
2254          * point
2255          */
2256         if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
2257             (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2258                 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2259                 goto out_agbp_relse;
2260         }
2261 
2262         need = xfs_alloc_min_freelist(mp, pag);
2263         if (!xfs_alloc_space_available(args, need, flags |
2264                         XFS_ALLOC_FLAG_CHECK))
2265                 goto out_agbp_relse;
2266 
2267         /*
2268          * Get the a.g. freespace buffer.
2269          * Can fail if we're not blocking on locks, and it's held.
2270          */
2271         if (!agbp) {
2272                 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2273                 if (error)
2274                         goto out_no_agbp;
2275                 if (!agbp) {
2276                         ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2277                         ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2278                         goto out_no_agbp;
2279                 }
2280         }
2281 
2282         /* reset a padding mismatched agfl before final free space check */
2283         if (pag->pagf_agflreset)
2284                 xfs_agfl_reset(tp, agbp, pag);
2285 
2286         /* If there isn't enough total space or single-extent, reject it. */
2287         need = xfs_alloc_min_freelist(mp, pag);
2288         if (!xfs_alloc_space_available(args, need, flags))
2289                 goto out_agbp_relse;
2290 
2291         /*
2292          * Make the freelist shorter if it's too long.
2293          *
2294          * Note that from this point onwards, we will always release the agf and
2295          * agfl buffers on error. This handles the case where we error out and
2296          * the buffers are clean or may not have been joined to the transaction
2297          * and hence need to be released manually. If they have been joined to
2298          * the transaction, then xfs_trans_brelse() will handle them
2299          * appropriately based on the recursion count and dirty state of the
2300          * buffer.
2301          *
2302          * XXX (dgc): When we have lots of free space, does this buy us
2303          * anything other than extra overhead when we need to put more blocks
2304          * back on the free list? Maybe we should only do this when space is
2305          * getting low or the AGFL is more than half full?
2306          *
2307          * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2308          * big; the NORMAP flag prevents AGFL expand/shrink operations from
2309          * updating the rmapbt.  Both flags are used in xfs_repair while we're
2310          * rebuilding the rmapbt, and neither are used by the kernel.  They're
2311          * both required to ensure that rmaps are correctly recorded for the
2312          * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
2313          * repair/rmap.c in xfsprogs for details.
2314          */
2315         memset(&targs, 0, sizeof(targs));
2316         /* struct copy below */
2317         if (flags & XFS_ALLOC_FLAG_NORMAP)
2318                 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2319         else
2320                 targs.oinfo = XFS_RMAP_OINFO_AG;
2321         while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2322                 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2323                 if (error)
2324                         goto out_agbp_relse;
2325 
2326                 /* defer agfl frees */
2327                 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2328         }
2329 
2330         targs.tp = tp;
2331         targs.mp = mp;
2332         targs.agbp = agbp;
2333         targs.agno = args->agno;
2334         targs.alignment = targs.minlen = targs.prod = 1;
2335         targs.type = XFS_ALLOCTYPE_THIS_AG;
2336         targs.pag = pag;
2337         error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2338         if (error)
2339                 goto out_agbp_relse;
2340 
2341         /* Make the freelist longer if it's too short. */
2342         while (pag->pagf_flcount < need) {
2343                 targs.agbno = 0;
2344                 targs.maxlen = need - pag->pagf_flcount;
2345                 targs.resv = XFS_AG_RESV_AGFL;
2346 
2347                 /* Allocate as many blocks as possible at once. */
2348                 error = xfs_alloc_ag_vextent(&targs);
2349                 if (error)
2350                         goto out_agflbp_relse;
2351 
2352                 /*
2353                  * Stop if we run out.  Won't happen if callers are obeying
2354                  * the restrictions correctly.  Can happen for free calls
2355                  * on a completely full ag.
2356                  */
2357                 if (targs.agbno == NULLAGBLOCK) {
2358                         if (flags & XFS_ALLOC_FLAG_FREEING)
2359                                 break;
2360                         goto out_agflbp_relse;
2361                 }
2362                 /*
2363                  * Put each allocated block on the list.
2364                  */
2365                 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2366                         error = xfs_alloc_put_freelist(tp, agbp,
2367                                                         agflbp, bno, 0);
2368                         if (error)
2369                                 goto out_agflbp_relse;
2370                 }
2371         }
2372         xfs_trans_brelse(tp, agflbp);
2373         args->agbp = agbp;
2374         return 0;
2375 
2376 out_agflbp_relse:
2377         xfs_trans_brelse(tp, agflbp);
2378 out_agbp_relse:
2379         if (agbp)
2380                 xfs_trans_brelse(tp, agbp);
2381 out_no_agbp:
2382         args->agbp = NULL;
2383         return error;
2384 }
2385 
2386 /*
2387  * Get a block from the freelist.
2388  * Returns with the buffer for the block gotten.
2389  */
2390 int                             /* error */
2391 xfs_alloc_get_freelist(
2392         xfs_trans_t     *tp,    /* transaction pointer */
2393         xfs_buf_t       *agbp,  /* buffer containing the agf structure */
2394         xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
2395         int             btreeblk) /* destination is a AGF btree */
2396 {
2397         xfs_agf_t       *agf;   /* a.g. freespace structure */
2398         xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
2399         xfs_agblock_t   bno;    /* block number returned */
2400         __be32          *agfl_bno;
2401         int             error;
2402         int             logflags;
2403         xfs_mount_t     *mp = tp->t_mountp;
2404         xfs_perag_t     *pag;   /* per allocation group data */
2405 
2406         /*
2407          * Freelist is empty, give up.
2408          */
2409         agf = XFS_BUF_TO_AGF(agbp);
2410         if (!agf->agf_flcount) {
2411                 *bnop = NULLAGBLOCK;
2412                 return 0;
2413         }
2414         /*
2415          * Read the array of free blocks.
2416          */
2417         error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2418                                     &agflbp);
2419         if (error)
2420                 return error;
2421 
2422 
2423         /*
2424          * Get the block number and update the data structures.
2425          */
2426         agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2427         bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2428         be32_add_cpu(&agf->agf_flfirst, 1);
2429         xfs_trans_brelse(tp, agflbp);
2430         if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2431                 agf->agf_flfirst = 0;
2432 
2433         pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2434         ASSERT(!pag->pagf_agflreset);
2435         be32_add_cpu(&agf->agf_flcount, -1);
2436         xfs_trans_agflist_delta(tp, -1);
2437         pag->pagf_flcount--;
2438 
2439         logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2440         if (btreeblk) {
2441                 be32_add_cpu(&agf->agf_btreeblks, 1);
2442                 pag->pagf_btreeblks++;
2443                 logflags |= XFS_AGF_BTREEBLKS;
2444         }
2445         xfs_perag_put(pag);
2446 
2447         xfs_alloc_log_agf(tp, agbp, logflags);
2448         *bnop = bno;
2449 
2450         return 0;
2451 }
2452 
2453 /*
2454  * Log the given fields from the agf structure.
2455  */
2456 void
2457 xfs_alloc_log_agf(
2458         xfs_trans_t     *tp,    /* transaction pointer */
2459         xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2460         int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2461 {
2462         int     first;          /* first byte offset */
2463         int     last;           /* last byte offset */
2464         static const short      offsets[] = {
2465                 offsetof(xfs_agf_t, agf_magicnum),
2466                 offsetof(xfs_agf_t, agf_versionnum),
2467                 offsetof(xfs_agf_t, agf_seqno),
2468                 offsetof(xfs_agf_t, agf_length),
2469                 offsetof(xfs_agf_t, agf_roots[0]),
2470                 offsetof(xfs_agf_t, agf_levels[0]),
2471                 offsetof(xfs_agf_t, agf_flfirst),
2472                 offsetof(xfs_agf_t, agf_fllast),
2473                 offsetof(xfs_agf_t, agf_flcount),
2474                 offsetof(xfs_agf_t, agf_freeblks),
2475                 offsetof(xfs_agf_t, agf_longest),
2476                 offsetof(xfs_agf_t, agf_btreeblks),
2477                 offsetof(xfs_agf_t, agf_uuid),
2478                 offsetof(xfs_agf_t, agf_rmap_blocks),
2479                 offsetof(xfs_agf_t, agf_refcount_blocks),
2480                 offsetof(xfs_agf_t, agf_refcount_root),
2481                 offsetof(xfs_agf_t, agf_refcount_level),
2482                 /* needed so that we don't log the whole rest of the structure: */
2483                 offsetof(xfs_agf_t, agf_spare64),
2484                 sizeof(xfs_agf_t)
2485         };
2486 
2487         trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2488 
2489         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2490 
2491         xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2492         xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2493 }
2494 
2495 /*
2496  * Interface for inode allocation to force the pag data to be initialized.
2497  */
2498 int                                     /* error */
2499 xfs_alloc_pagf_init(
2500         xfs_mount_t             *mp,    /* file system mount structure */
2501         xfs_trans_t             *tp,    /* transaction pointer */
2502         xfs_agnumber_t          agno,   /* allocation group number */
2503         int                     flags)  /* XFS_ALLOC_FLAGS_... */
2504 {
2505         xfs_buf_t               *bp;
2506         int                     error;
2507 
2508         if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2509                 return error;
2510         if (bp)
2511                 xfs_trans_brelse(tp, bp);
2512         return 0;
2513 }
2514 
2515 /*
2516  * Put the block on the freelist for the allocation group.
2517  */
2518 int                                     /* error */
2519 xfs_alloc_put_freelist(
2520         xfs_trans_t             *tp,    /* transaction pointer */
2521         xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2522         xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2523         xfs_agblock_t           bno,    /* block being freed */
2524         int                     btreeblk) /* block came from a AGF btree */
2525 {
2526         xfs_agf_t               *agf;   /* a.g. freespace structure */
2527         __be32                  *blockp;/* pointer to array entry */
2528         int                     error;
2529         int                     logflags;
2530         xfs_mount_t             *mp;    /* mount structure */
2531         xfs_perag_t             *pag;   /* per allocation group data */
2532         __be32                  *agfl_bno;
2533         int                     startoff;
2534 
2535         agf = XFS_BUF_TO_AGF(agbp);
2536         mp = tp->t_mountp;
2537 
2538         if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2539                         be32_to_cpu(agf->agf_seqno), &agflbp)))
2540                 return error;
2541         be32_add_cpu(&agf->agf_fllast, 1);
2542         if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
2543                 agf->agf_fllast = 0;
2544 
2545         pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2546         ASSERT(!pag->pagf_agflreset);
2547         be32_add_cpu(&agf->agf_flcount, 1);
2548         xfs_trans_agflist_delta(tp, 1);
2549         pag->pagf_flcount++;
2550 
2551         logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2552         if (btreeblk) {
2553                 be32_add_cpu(&agf->agf_btreeblks, -1);
2554                 pag->pagf_btreeblks--;
2555                 logflags |= XFS_AGF_BTREEBLKS;
2556         }
2557         xfs_perag_put(pag);
2558 
2559         xfs_alloc_log_agf(tp, agbp, logflags);
2560 
2561         ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
2562 
2563         agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2564         blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2565         *blockp = cpu_to_be32(bno);
2566         startoff = (char *)blockp - (char *)agflbp->b_addr;
2567 
2568         xfs_alloc_log_agf(tp, agbp, logflags);
2569 
2570         xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2571         xfs_trans_log_buf(tp, agflbp, startoff,
2572                           startoff + sizeof(xfs_agblock_t) - 1);
2573         return 0;
2574 }
2575 
2576 static xfs_failaddr_t
2577 xfs_agf_verify(
2578         struct xfs_buf          *bp)
2579 {
2580         struct xfs_mount        *mp = bp->b_mount;
2581         struct xfs_agf          *agf = XFS_BUF_TO_AGF(bp);
2582 
2583         if (xfs_sb_version_hascrc(&mp->m_sb)) {
2584                 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2585                         return __this_address;
2586                 if (!xfs_log_check_lsn(mp,
2587                                 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2588                         return __this_address;
2589         }
2590 
2591         if (!xfs_verify_magic(bp, agf->agf_magicnum))
2592                 return __this_address;
2593 
2594         if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2595               be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2596               be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2597               be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2598               be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
2599                 return __this_address;
2600 
2601         if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2602             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2603             be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2604             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2605                 return __this_address;
2606 
2607         if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2608             (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2609              be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2610                 return __this_address;
2611 
2612         /*
2613          * during growfs operations, the perag is not fully initialised,
2614          * so we can't use it for any useful checking. growfs ensures we can't
2615          * use it by using uncached buffers that don't have the perag attached
2616          * so we can detect and avoid this problem.
2617          */
2618         if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2619                 return __this_address;
2620 
2621         if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2622             be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2623                 return __this_address;
2624 
2625         if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2626             (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2627              be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2628                 return __this_address;
2629 
2630         return NULL;
2631 
2632 }
2633 
2634 static void
2635 xfs_agf_read_verify(
2636         struct xfs_buf  *bp)
2637 {
2638         struct xfs_mount *mp = bp->b_mount;
2639         xfs_failaddr_t  fa;
2640 
2641         if (xfs_sb_version_hascrc(&mp->m_sb) &&
2642             !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2643                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2644         else {
2645                 fa = xfs_agf_verify(bp);
2646                 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2647                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2648         }
2649 }
2650 
2651 static void
2652 xfs_agf_write_verify(
2653         struct xfs_buf  *bp)
2654 {
2655         struct xfs_mount        *mp = bp->b_mount;
2656         struct xfs_buf_log_item *bip = bp->b_log_item;
2657         xfs_failaddr_t          fa;
2658 
2659         fa = xfs_agf_verify(bp);
2660         if (fa) {
2661                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2662                 return;
2663         }
2664 
2665         if (!xfs_sb_version_hascrc(&mp->m_sb))
2666                 return;
2667 
2668         if (bip)
2669                 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2670 
2671         xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2672 }
2673 
2674 const struct xfs_buf_ops xfs_agf_buf_ops = {
2675         .name = "xfs_agf",
2676         .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
2677         .verify_read = xfs_agf_read_verify,
2678         .verify_write = xfs_agf_write_verify,
2679         .verify_struct = xfs_agf_verify,
2680 };
2681 
2682 /*
2683  * Read in the allocation group header (free/alloc section).
2684  */
2685 int                                     /* error */
2686 xfs_read_agf(
2687         struct xfs_mount        *mp,    /* mount point structure */
2688         struct xfs_trans        *tp,    /* transaction pointer */
2689         xfs_agnumber_t          agno,   /* allocation group number */
2690         int                     flags,  /* XFS_BUF_ */
2691         struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2692 {
2693         int             error;
2694 
2695         trace_xfs_read_agf(mp, agno);
2696 
2697         ASSERT(agno != NULLAGNUMBER);
2698         error = xfs_trans_read_buf(
2699                         mp, tp, mp->m_ddev_targp,
2700                         XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2701                         XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2702         if (error)
2703                 return error;
2704         if (!*bpp)
2705                 return 0;
2706 
2707         ASSERT(!(*bpp)->b_error);
2708         xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2709         return 0;
2710 }
2711 
2712 /*
2713  * Read in the allocation group header (free/alloc section).
2714  */
2715 int                                     /* error */
2716 xfs_alloc_read_agf(
2717         struct xfs_mount        *mp,    /* mount point structure */
2718         struct xfs_trans        *tp,    /* transaction pointer */
2719         xfs_agnumber_t          agno,   /* allocation group number */
2720         int                     flags,  /* XFS_ALLOC_FLAG_... */
2721         struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2722 {
2723         struct xfs_agf          *agf;           /* ag freelist header */
2724         struct xfs_perag        *pag;           /* per allocation group data */
2725         int                     error;
2726 
2727         trace_xfs_alloc_read_agf(mp, agno);
2728 
2729         ASSERT(agno != NULLAGNUMBER);
2730         error = xfs_read_agf(mp, tp, agno,
2731                         (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2732                         bpp);
2733         if (error)
2734                 return error;
2735         if (!*bpp)
2736                 return 0;
2737         ASSERT(!(*bpp)->b_error);
2738 
2739         agf = XFS_BUF_TO_AGF(*bpp);
2740         pag = xfs_perag_get(mp, agno);
2741         if (!pag->pagf_init) {
2742                 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2743                 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2744                 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2745                 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2746                 pag->pagf_levels[XFS_BTNUM_BNOi] =
2747                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2748                 pag->pagf_levels[XFS_BTNUM_CNTi] =
2749                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2750                 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2751                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
2752                 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2753                 pag->pagf_init = 1;
2754                 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2755         }
2756 #ifdef DEBUG
2757         else if (!XFS_FORCED_SHUTDOWN(mp)) {
2758                 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2759                 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2760                 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2761                 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2762                 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2763                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2764                 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2765                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2766         }
2767 #endif
2768         xfs_perag_put(pag);
2769         return 0;
2770 }
2771 
2772 /*
2773  * Allocate an extent (variable-size).
2774  * Depending on the allocation type, we either look in a single allocation
2775  * group or loop over the allocation groups to find the result.
2776  */
2777 int                             /* error */
2778 xfs_alloc_vextent(
2779         struct xfs_alloc_arg    *args)  /* allocation argument structure */
2780 {
2781         xfs_agblock_t           agsize; /* allocation group size */
2782         int                     error;
2783         int                     flags;  /* XFS_ALLOC_FLAG_... locking flags */
2784         struct xfs_mount        *mp;    /* mount structure pointer */
2785         xfs_agnumber_t          sagno;  /* starting allocation group number */
2786         xfs_alloctype_t         type;   /* input allocation type */
2787         int                     bump_rotor = 0;
2788         xfs_agnumber_t          rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2789 
2790         mp = args->mp;
2791         type = args->otype = args->type;
2792         args->agbno = NULLAGBLOCK;
2793         /*
2794          * Just fix this up, for the case where the last a.g. is shorter
2795          * (or there's only one a.g.) and the caller couldn't easily figure
2796          * that out (xfs_bmap_alloc).
2797          */
2798         agsize = mp->m_sb.sb_agblocks;
2799         if (args->maxlen > agsize)
2800                 args->maxlen = agsize;
2801         if (args->alignment == 0)
2802                 args->alignment = 1;
2803         ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2804         ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2805         ASSERT(args->minlen <= args->maxlen);
2806         ASSERT(args->minlen <= agsize);
2807         ASSERT(args->mod < args->prod);
2808         if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2809             XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2810             args->minlen > args->maxlen || args->minlen > agsize ||
2811             args->mod >= args->prod) {
2812                 args->fsbno = NULLFSBLOCK;
2813                 trace_xfs_alloc_vextent_badargs(args);
2814                 return 0;
2815         }
2816 
2817         switch (type) {
2818         case XFS_ALLOCTYPE_THIS_AG:
2819         case XFS_ALLOCTYPE_NEAR_BNO:
2820         case XFS_ALLOCTYPE_THIS_BNO:
2821                 /*
2822                  * These three force us into a single a.g.
2823                  */
2824                 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2825                 args->pag = xfs_perag_get(mp, args->agno);
2826                 error = xfs_alloc_fix_freelist(args, 0);
2827                 if (error) {
2828                         trace_xfs_alloc_vextent_nofix(args);
2829                         goto error0;
2830                 }
2831                 if (!args->agbp) {
2832                         trace_xfs_alloc_vextent_noagbp(args);
2833                         break;
2834                 }
2835                 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2836                 if ((error = xfs_alloc_ag_vextent(args)))
2837                         goto error0;
2838                 break;
2839         case XFS_ALLOCTYPE_START_BNO:
2840                 /*
2841                  * Try near allocation first, then anywhere-in-ag after
2842                  * the first a.g. fails.
2843                  */
2844                 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2845                     (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2846                         args->fsbno = XFS_AGB_TO_FSB(mp,
2847                                         ((mp->m_agfrotor / rotorstep) %
2848                                         mp->m_sb.sb_agcount), 0);
2849                         bump_rotor = 1;
2850                 }
2851                 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2852                 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2853                 /* FALLTHROUGH */
2854         case XFS_ALLOCTYPE_FIRST_AG:
2855                 /*
2856                  * Rotate through the allocation groups looking for a winner.
2857                  */
2858                 if (type == XFS_ALLOCTYPE_FIRST_AG) {
2859                         /*
2860                          * Start with allocation group given by bno.
2861                          */
2862                         args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2863                         args->type = XFS_ALLOCTYPE_THIS_AG;
2864                         sagno = 0;
2865                         flags = 0;
2866                 } else {
2867                         /*
2868                          * Start with the given allocation group.
2869                          */
2870                         args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2871                         flags = XFS_ALLOC_FLAG_TRYLOCK;
2872                 }
2873                 /*
2874                  * Loop over allocation groups twice; first time with
2875                  * trylock set, second time without.
2876                  */
2877                 for (;;) {
2878                         args->pag = xfs_perag_get(mp, args->agno);
2879                         error = xfs_alloc_fix_freelist(args, flags);
2880                         if (error) {
2881                                 trace_xfs_alloc_vextent_nofix(args);
2882                                 goto error0;
2883                         }
2884                         /*
2885                          * If we get a buffer back then the allocation will fly.
2886                          */
2887                         if (args->agbp) {
2888                                 if ((error = xfs_alloc_ag_vextent(args)))
2889                                         goto error0;
2890                                 break;
2891                         }
2892 
2893                         trace_xfs_alloc_vextent_loopfailed(args);
2894 
2895                         /*
2896                          * Didn't work, figure out the next iteration.
2897                          */
2898                         if (args->agno == sagno &&
2899                             type == XFS_ALLOCTYPE_START_BNO)
2900                                 args->type = XFS_ALLOCTYPE_THIS_AG;
2901                         /*
2902                         * For the first allocation, we can try any AG to get
2903                         * space.  However, if we already have allocated a
2904                         * block, we don't want to try AGs whose number is below
2905                         * sagno. Otherwise, we may end up with out-of-order
2906                         * locking of AGF, which might cause deadlock.
2907                         */
2908                         if (++(args->agno) == mp->m_sb.sb_agcount) {
2909                                 if (args->tp->t_firstblock != NULLFSBLOCK)
2910                                         args->agno = sagno;
2911                                 else
2912                                         args->agno = 0;
2913                         }
2914                         /*
2915                          * Reached the starting a.g., must either be done
2916                          * or switch to non-trylock mode.
2917                          */
2918                         if (args->agno == sagno) {
2919                                 if (flags == 0) {
2920                                         args->agbno = NULLAGBLOCK;
2921                                         trace_xfs_alloc_vextent_allfailed(args);
2922                                         break;
2923                                 }
2924 
2925                                 flags = 0;
2926                                 if (type == XFS_ALLOCTYPE_START_BNO) {
2927                                         args->agbno = XFS_FSB_TO_AGBNO(mp,
2928                                                 args->fsbno);
2929                                         args->type = XFS_ALLOCTYPE_NEAR_BNO;
2930                                 }
2931                         }
2932                         xfs_perag_put(args->pag);
2933                 }
2934                 if (bump_rotor) {
2935                         if (args->agno == sagno)
2936                                 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2937                                         (mp->m_sb.sb_agcount * rotorstep);
2938                         else
2939                                 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2940                                         (mp->m_sb.sb_agcount * rotorstep);
2941                 }
2942                 break;
2943         default:
2944                 ASSERT(0);
2945                 /* NOTREACHED */
2946         }
2947         if (args->agbno == NULLAGBLOCK)
2948                 args->fsbno = NULLFSBLOCK;
2949         else {
2950                 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2951 #ifdef DEBUG
2952                 ASSERT(args->len >= args->minlen);
2953                 ASSERT(args->len <= args->maxlen);
2954                 ASSERT(args->agbno % args->alignment == 0);
2955                 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2956                         args->len);
2957 #endif
2958 
2959                 /* Zero the extent if we were asked to do so */
2960                 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
2961                         error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2962                         if (error)
2963                                 goto error0;
2964                 }
2965 
2966         }
2967         xfs_perag_put(args->pag);
2968         return 0;
2969 error0:
2970         xfs_perag_put(args->pag);
2971         return error;
2972 }
2973 
2974 /* Ensure that the freelist is at full capacity. */
2975 int
2976 xfs_free_extent_fix_freelist(
2977         struct xfs_trans        *tp,
2978         xfs_agnumber_t          agno,
2979         struct xfs_buf          **agbp)
2980 {
2981         struct xfs_alloc_arg    args;
2982         int                     error;
2983 
2984         memset(&args, 0, sizeof(struct xfs_alloc_arg));
2985         args.tp = tp;
2986         args.mp = tp->t_mountp;
2987         args.agno = agno;
2988 
2989         /*
2990          * validate that the block number is legal - the enables us to detect
2991          * and handle a silent filesystem corruption rather than crashing.
2992          */
2993         if (args.agno >= args.mp->m_sb.sb_agcount)
2994                 return -EFSCORRUPTED;
2995 
2996         args.pag = xfs_perag_get(args.mp, args.agno);
2997         ASSERT(args.pag);
2998 
2999         error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3000         if (error)
3001                 goto out;
3002 
3003         *agbp = args.agbp;
3004 out:
3005         xfs_perag_put(args.pag);
3006         return error;
3007 }
3008 
3009 /*
3010  * Free an extent.
3011  * Just break up the extent address and hand off to xfs_free_ag_extent
3012  * after fixing up the freelist.
3013  */
3014 int
3015 __xfs_free_extent(
3016         struct xfs_trans                *tp,
3017         xfs_fsblock_t                   bno,
3018         xfs_extlen_t                    len,
3019         const struct xfs_owner_info     *oinfo,
3020         enum xfs_ag_resv_type           type,
3021         bool                            skip_discard)
3022 {
3023         struct xfs_mount                *mp = tp->t_mountp;
3024         struct xfs_buf                  *agbp;
3025         xfs_agnumber_t                  agno = XFS_FSB_TO_AGNO(mp, bno);
3026         xfs_agblock_t                   agbno = XFS_FSB_TO_AGBNO(mp, bno);
3027         int                             error;
3028         unsigned int                    busy_flags = 0;
3029 
3030         ASSERT(len != 0);
3031         ASSERT(type != XFS_AG_RESV_AGFL);
3032 
3033         if (XFS_TEST_ERROR(false, mp,
3034                         XFS_ERRTAG_FREE_EXTENT))
3035                 return -EIO;
3036 
3037         error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3038         if (error)
3039                 return error;
3040 
3041         XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
3042 
3043         /* validate the extent size is legal now we have the agf locked */
3044         XFS_WANT_CORRUPTED_GOTO(mp,
3045                 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
3046                                 err);
3047 
3048         error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
3049         if (error)
3050                 goto err;
3051 
3052         if (skip_discard)
3053                 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3054         xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
3055         return 0;
3056 
3057 err:
3058         xfs_trans_brelse(tp, agbp);
3059         return error;
3060 }
3061 
3062 struct xfs_alloc_query_range_info {
3063         xfs_alloc_query_range_fn        fn;
3064         void                            *priv;
3065 };
3066 
3067 /* Format btree record and pass to our callback. */
3068 STATIC int
3069 xfs_alloc_query_range_helper(
3070         struct xfs_btree_cur            *cur,
3071         union xfs_btree_rec             *rec,
3072         void                            *priv)
3073 {
3074         struct xfs_alloc_query_range_info       *query = priv;
3075         struct xfs_alloc_rec_incore             irec;
3076 
3077         irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3078         irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3079         return query->fn(cur, &irec, query->priv);
3080 }
3081 
3082 /* Find all free space within a given range of blocks. */
3083 int
3084 xfs_alloc_query_range(
3085         struct xfs_btree_cur                    *cur,
3086         struct xfs_alloc_rec_incore             *low_rec,
3087         struct xfs_alloc_rec_incore             *high_rec,
3088         xfs_alloc_query_range_fn                fn,
3089         void                                    *priv)
3090 {
3091         union xfs_btree_irec                    low_brec;
3092         union xfs_btree_irec                    high_brec;
3093         struct xfs_alloc_query_range_info       query;
3094 
3095         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3096         low_brec.a = *low_rec;
3097         high_brec.a = *high_rec;
3098         query.priv = priv;
3099         query.fn = fn;
3100         return xfs_btree_query_range(cur, &low_brec, &high_brec,
3101                         xfs_alloc_query_range_helper, &query);
3102 }
3103 
3104 /* Find all free space records. */
3105 int
3106 xfs_alloc_query_all(
3107         struct xfs_btree_cur                    *cur,
3108         xfs_alloc_query_range_fn                fn,
3109         void                                    *priv)
3110 {
3111         struct xfs_alloc_query_range_info       query;
3112 
3113         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3114         query.priv = priv;
3115         query.fn = fn;
3116         return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3117 }
3118 
3119 /* Is there a record covering a given extent? */
3120 int
3121 xfs_alloc_has_record(
3122         struct xfs_btree_cur    *cur,
3123         xfs_agblock_t           bno,
3124         xfs_extlen_t            len,
3125         bool                    *exists)
3126 {
3127         union xfs_btree_irec    low;
3128         union xfs_btree_irec    high;
3129 
3130         memset(&low, 0, sizeof(low));
3131         low.a.ar_startblock = bno;
3132         memset(&high, 0xFF, sizeof(high));
3133         high.a.ar_startblock = bno + len - 1;
3134 
3135         return xfs_btree_has_record(cur, &low, &high, exists);
3136 }
3137 
3138 /*
3139  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
3140  * error code or XFS_ITER_*.
3141  */
3142 int
3143 xfs_agfl_walk(
3144         struct xfs_mount        *mp,
3145         struct xfs_agf          *agf,
3146         struct xfs_buf          *agflbp,
3147         xfs_agfl_walk_fn        walk_fn,
3148         void                    *priv)
3149 {
3150         __be32                  *agfl_bno;
3151         unsigned int            i;
3152         int                     error;
3153 
3154         agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
3155         i = be32_to_cpu(agf->agf_flfirst);
3156 
3157         /* Nothing to walk in an empty AGFL. */
3158         if (agf->agf_flcount == cpu_to_be32(0))
3159                 return 0;
3160 
3161         /* Otherwise, walk from first to last, wrapping as needed. */
3162         for (;;) {
3163                 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3164                 if (error)
3165                         return error;
3166                 if (i == be32_to_cpu(agf->agf_fllast))
3167                         break;
3168                 if (++i == xfs_agfl_size(mp))
3169                         i = 0;
3170         }
3171 
3172         return 0;
3173 }

/* [<][>][^][v][top][bottom][index][help] */