root/fs/jfs/jfs_xtree.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. xtLookup
  2. xtSearch
  3. xtInsert
  4. xtSplitUp
  5. xtSplitPage
  6. xtSplitRoot
  7. xtExtend
  8. xtTailgate
  9. xtUpdate
  10. xtAppend
  11. xtDelete
  12. xtDeleteUp
  13. xtRelocate
  14. xtSearchNode
  15. xtRelink
  16. xtInitRoot
  17. xtTruncate
  18. xtTruncate_pmap
  19. jfs_xtstat_proc_show

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *   Copyright (C) International Business Machines Corp., 2000-2005
   4  */
   5 /*
   6  *      jfs_xtree.c: extent allocation descriptor B+-tree manager
   7  */
   8 
   9 #include <linux/fs.h>
  10 #include <linux/module.h>
  11 #include <linux/quotaops.h>
  12 #include <linux/seq_file.h>
  13 #include "jfs_incore.h"
  14 #include "jfs_filsys.h"
  15 #include "jfs_metapage.h"
  16 #include "jfs_dmap.h"
  17 #include "jfs_dinode.h"
  18 #include "jfs_superblock.h"
  19 #include "jfs_debug.h"
  20 
  21 /*
  22  * xtree local flag
  23  */
  24 #define XT_INSERT       0x00000001
  25 
  26 /*
  27  *      xtree key/entry comparison: extent offset
  28  *
  29  * return:
  30  *      -1: k < start of extent
  31  *       0: start_of_extent <= k <= end_of_extent
  32  *       1: k > end_of_extent
  33  */
  34 #define XT_CMP(CMP, K, X, OFFSET64)\
  35 {\
  36         OFFSET64 = offsetXAD(X);\
  37         (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
  38                 ((K) < OFFSET64) ? -1 : 0;\
  39 }
  40 
  41 /* write a xad entry */
  42 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
  43 {\
  44         (XAD)->flag = (FLAG);\
  45         XADoffset((XAD), (OFF));\
  46         XADlength((XAD), (LEN));\
  47         XADaddress((XAD), (ADDR));\
  48 }
  49 
  50 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
  51 
  52 /* get page buffer for specified block address */
  53 /* ToDo: Replace this ugly macro with a function */
  54 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)                             \
  55 do {                                                                    \
  56         BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot);        \
  57         if (!(RC)) {                                                    \
  58                 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
  59                     (le16_to_cpu((P)->header.nextindex) >               \
  60                      le16_to_cpu((P)->header.maxentry)) ||              \
  61                     (le16_to_cpu((P)->header.maxentry) >                \
  62                      (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
  63                         jfs_error((IP)->i_sb,                           \
  64                                   "XT_GETPAGE: xtree page corrupt\n");  \
  65                         BT_PUTPAGE(MP);                                 \
  66                         MP = NULL;                                      \
  67                         RC = -EIO;                                      \
  68                 }                                                       \
  69         }                                                               \
  70 } while (0)
  71 
  72 /* for consistency */
  73 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
  74 
  75 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
  76         BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
  77 /* xtree entry parameter descriptor */
  78 struct xtsplit {
  79         struct metapage *mp;
  80         s16 index;
  81         u8 flag;
  82         s64 off;
  83         s64 addr;
  84         int len;
  85         struct pxdlist *pxdlist;
  86 };
  87 
  88 
  89 /*
  90  *      statistics
  91  */
  92 #ifdef CONFIG_JFS_STATISTICS
  93 static struct {
  94         uint search;
  95         uint fastSearch;
  96         uint split;
  97 } xtStat;
  98 #endif
  99 
 100 
 101 /*
 102  * forward references
 103  */
 104 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
 105                     struct btstack * btstack, int flag);
 106 
 107 static int xtSplitUp(tid_t tid,
 108                      struct inode *ip,
 109                      struct xtsplit * split, struct btstack * btstack);
 110 
 111 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
 112                        struct metapage ** rmpp, s64 * rbnp);
 113 
 114 static int xtSplitRoot(tid_t tid, struct inode *ip,
 115                        struct xtsplit * split, struct metapage ** rmpp);
 116 
 117 #ifdef _STILL_TO_PORT
 118 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
 119                       xtpage_t * fp, struct btstack * btstack);
 120 
 121 static int xtSearchNode(struct inode *ip,
 122                         xad_t * xad,
 123                         int *cmpp, struct btstack * btstack, int flag);
 124 
 125 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 126 #endif                          /*  _STILL_TO_PORT */
 127 
 128 /*
 129  *      xtLookup()
 130  *
 131  * function: map a single page into a physical extent;
 132  */
 133 int xtLookup(struct inode *ip, s64 lstart,
 134              s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
 135 {
 136         int rc = 0;
 137         struct btstack btstack;
 138         int cmp;
 139         s64 bn;
 140         struct metapage *mp;
 141         xtpage_t *p;
 142         int index;
 143         xad_t *xad;
 144         s64 next, size, xoff, xend;
 145         int xlen;
 146         s64 xaddr;
 147 
 148         *paddr = 0;
 149         *plen = llen;
 150 
 151         if (!no_check) {
 152                 /* is lookup offset beyond eof ? */
 153                 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
 154                     JFS_SBI(ip->i_sb)->l2bsize;
 155                 if (lstart >= size)
 156                         return 0;
 157         }
 158 
 159         /*
 160          * search for the xad entry covering the logical extent
 161          */
 162 //search:
 163         if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
 164                 jfs_err("xtLookup: xtSearch returned %d", rc);
 165                 return rc;
 166         }
 167 
 168         /*
 169          *      compute the physical extent covering logical extent
 170          *
 171          * N.B. search may have failed (e.g., hole in sparse file),
 172          * and returned the index of the next entry.
 173          */
 174         /* retrieve search result */
 175         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 176 
 177         /* is xad found covering start of logical extent ?
 178          * lstart is a page start address,
 179          * i.e., lstart cannot start in a hole;
 180          */
 181         if (cmp) {
 182                 if (next)
 183                         *plen = min(next - lstart, llen);
 184                 goto out;
 185         }
 186 
 187         /*
 188          * lxd covered by xad
 189          */
 190         xad = &p->xad[index];
 191         xoff = offsetXAD(xad);
 192         xlen = lengthXAD(xad);
 193         xend = xoff + xlen;
 194         xaddr = addressXAD(xad);
 195 
 196         /* initialize new pxd */
 197         *pflag = xad->flag;
 198         *paddr = xaddr + (lstart - xoff);
 199         /* a page must be fully covered by an xad */
 200         *plen = min(xend - lstart, llen);
 201 
 202       out:
 203         XT_PUTPAGE(mp);
 204 
 205         return rc;
 206 }
 207 
 208 /*
 209  *      xtSearch()
 210  *
 211  * function:    search for the xad entry covering specified offset.
 212  *
 213  * parameters:
 214  *      ip      - file object;
 215  *      xoff    - extent offset;
 216  *      nextp   - address of next extent (if any) for search miss
 217  *      cmpp    - comparison result:
 218  *      btstack - traverse stack;
 219  *      flag    - search process flag (XT_INSERT);
 220  *
 221  * returns:
 222  *      btstack contains (bn, index) of search path traversed to the entry.
 223  *      *cmpp is set to result of comparison with the entry returned.
 224  *      the page containing the entry is pinned at exit.
 225  */
 226 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
 227                     int *cmpp, struct btstack * btstack, int flag)
 228 {
 229         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 230         int rc = 0;
 231         int cmp = 1;            /* init for empty page */
 232         s64 bn;                 /* block number */
 233         struct metapage *mp;    /* page buffer */
 234         xtpage_t *p;            /* page */
 235         xad_t *xad;
 236         int base, index, lim, btindex;
 237         struct btframe *btsp;
 238         int nsplit = 0;         /* number of pages to split */
 239         s64 t64;
 240         s64 next = 0;
 241 
 242         INCREMENT(xtStat.search);
 243 
 244         BT_CLR(btstack);
 245 
 246         btstack->nsplit = 0;
 247 
 248         /*
 249          *      search down tree from root:
 250          *
 251          * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 252          * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 253          *
 254          * if entry with search key K is not found
 255          * internal page search find the entry with largest key Ki
 256          * less than K which point to the child page to search;
 257          * leaf page search find the entry with smallest key Kj
 258          * greater than K so that the returned index is the position of
 259          * the entry to be shifted right for insertion of new entry.
 260          * for empty tree, search key is greater than any key of the tree.
 261          *
 262          * by convention, root bn = 0.
 263          */
 264         for (bn = 0;;) {
 265                 /* get/pin the page to search */
 266                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 267                 if (rc)
 268                         return rc;
 269 
 270                 /* try sequential access heuristics with the previous
 271                  * access entry in target leaf page:
 272                  * once search narrowed down into the target leaf,
 273                  * key must either match an entry in the leaf or
 274                  * key entry does not exist in the tree;
 275                  */
 276 //fastSearch:
 277                 if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
 278                     (p->header.flag & BT_LEAF) &&
 279                     (index = jfs_ip->btindex) <
 280                     le16_to_cpu(p->header.nextindex)) {
 281                         xad = &p->xad[index];
 282                         t64 = offsetXAD(xad);
 283                         if (xoff < t64 + lengthXAD(xad)) {
 284                                 if (xoff >= t64) {
 285                                         *cmpp = 0;
 286                                         goto out;
 287                                 }
 288 
 289                                 /* stop sequential access heuristics */
 290                                 goto binarySearch;
 291                         } else {        /* (t64 + lengthXAD(xad)) <= xoff */
 292 
 293                                 /* try next sequential entry */
 294                                 index++;
 295                                 if (index <
 296                                     le16_to_cpu(p->header.nextindex)) {
 297                                         xad++;
 298                                         t64 = offsetXAD(xad);
 299                                         if (xoff < t64 + lengthXAD(xad)) {
 300                                                 if (xoff >= t64) {
 301                                                         *cmpp = 0;
 302                                                         goto out;
 303                                                 }
 304 
 305                                                 /* miss: key falls between
 306                                                  * previous and this entry
 307                                                  */
 308                                                 *cmpp = 1;
 309                                                 next = t64;
 310                                                 goto out;
 311                                         }
 312 
 313                                         /* (xoff >= t64 + lengthXAD(xad));
 314                                          * matching entry may be further out:
 315                                          * stop heuristic search
 316                                          */
 317                                         /* stop sequential access heuristics */
 318                                         goto binarySearch;
 319                                 }
 320 
 321                                 /* (index == p->header.nextindex);
 322                                  * miss: key entry does not exist in
 323                                  * the target leaf/tree
 324                                  */
 325                                 *cmpp = 1;
 326                                 goto out;
 327                         }
 328 
 329                         /*
 330                          * if hit, return index of the entry found, and
 331                          * if miss, where new entry with search key is
 332                          * to be inserted;
 333                          */
 334                       out:
 335                         /* compute number of pages to split */
 336                         if (flag & XT_INSERT) {
 337                                 if (p->header.nextindex ==      /* little-endian */
 338                                     p->header.maxentry)
 339                                         nsplit++;
 340                                 else
 341                                         nsplit = 0;
 342                                 btstack->nsplit = nsplit;
 343                         }
 344 
 345                         /* save search result */
 346                         btsp = btstack->top;
 347                         btsp->bn = bn;
 348                         btsp->index = index;
 349                         btsp->mp = mp;
 350 
 351                         /* update sequential access heuristics */
 352                         jfs_ip->btindex = index;
 353 
 354                         if (nextp)
 355                                 *nextp = next;
 356 
 357                         INCREMENT(xtStat.fastSearch);
 358                         return 0;
 359                 }
 360 
 361                 /* well, ... full search now */
 362               binarySearch:
 363                 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
 364 
 365                 /*
 366                  * binary search with search key K on the current page
 367                  */
 368                 for (base = XTENTRYSTART; lim; lim >>= 1) {
 369                         index = base + (lim >> 1);
 370 
 371                         XT_CMP(cmp, xoff, &p->xad[index], t64);
 372                         if (cmp == 0) {
 373                                 /*
 374                                  *      search hit
 375                                  */
 376                                 /* search hit - leaf page:
 377                                  * return the entry found
 378                                  */
 379                                 if (p->header.flag & BT_LEAF) {
 380                                         *cmpp = cmp;
 381 
 382                                         /* compute number of pages to split */
 383                                         if (flag & XT_INSERT) {
 384                                                 if (p->header.nextindex ==
 385                                                     p->header.maxentry)
 386                                                         nsplit++;
 387                                                 else
 388                                                         nsplit = 0;
 389                                                 btstack->nsplit = nsplit;
 390                                         }
 391 
 392                                         /* save search result */
 393                                         btsp = btstack->top;
 394                                         btsp->bn = bn;
 395                                         btsp->index = index;
 396                                         btsp->mp = mp;
 397 
 398                                         /* init sequential access heuristics */
 399                                         btindex = jfs_ip->btindex;
 400                                         if (index == btindex ||
 401                                             index == btindex + 1)
 402                                                 jfs_ip->btorder = BT_SEQUENTIAL;
 403                                         else
 404                                                 jfs_ip->btorder = BT_RANDOM;
 405                                         jfs_ip->btindex = index;
 406 
 407                                         return 0;
 408                                 }
 409                                 /* search hit - internal page:
 410                                  * descend/search its child page
 411                                  */
 412                                 if (index < le16_to_cpu(p->header.nextindex)-1)
 413                                         next = offsetXAD(&p->xad[index + 1]);
 414                                 goto next;
 415                         }
 416 
 417                         if (cmp > 0) {
 418                                 base = index + 1;
 419                                 --lim;
 420                         }
 421                 }
 422 
 423                 /*
 424                  *      search miss
 425                  *
 426                  * base is the smallest index with key (Kj) greater than
 427                  * search key (K) and may be zero or maxentry index.
 428                  */
 429                 if (base < le16_to_cpu(p->header.nextindex))
 430                         next = offsetXAD(&p->xad[base]);
 431                 /*
 432                  * search miss - leaf page:
 433                  *
 434                  * return location of entry (base) where new entry with
 435                  * search key K is to be inserted.
 436                  */
 437                 if (p->header.flag & BT_LEAF) {
 438                         *cmpp = cmp;
 439 
 440                         /* compute number of pages to split */
 441                         if (flag & XT_INSERT) {
 442                                 if (p->header.nextindex ==
 443                                     p->header.maxentry)
 444                                         nsplit++;
 445                                 else
 446                                         nsplit = 0;
 447                                 btstack->nsplit = nsplit;
 448                         }
 449 
 450                         /* save search result */
 451                         btsp = btstack->top;
 452                         btsp->bn = bn;
 453                         btsp->index = base;
 454                         btsp->mp = mp;
 455 
 456                         /* init sequential access heuristics */
 457                         btindex = jfs_ip->btindex;
 458                         if (base == btindex || base == btindex + 1)
 459                                 jfs_ip->btorder = BT_SEQUENTIAL;
 460                         else
 461                                 jfs_ip->btorder = BT_RANDOM;
 462                         jfs_ip->btindex = base;
 463 
 464                         if (nextp)
 465                                 *nextp = next;
 466 
 467                         return 0;
 468                 }
 469 
 470                 /*
 471                  * search miss - non-leaf page:
 472                  *
 473                  * if base is non-zero, decrement base by one to get the parent
 474                  * entry of the child page to search.
 475                  */
 476                 index = base ? base - 1 : base;
 477 
 478                 /*
 479                  * go down to child page
 480                  */
 481               next:
 482                 /* update number of pages to split */
 483                 if (p->header.nextindex == p->header.maxentry)
 484                         nsplit++;
 485                 else
 486                         nsplit = 0;
 487 
 488                 /* push (bn, index) of the parent page/entry */
 489                 if (BT_STACK_FULL(btstack)) {
 490                         jfs_error(ip->i_sb, "stack overrun!\n");
 491                         XT_PUTPAGE(mp);
 492                         return -EIO;
 493                 }
 494                 BT_PUSH(btstack, bn, index);
 495 
 496                 /* get the child page block number */
 497                 bn = addressXAD(&p->xad[index]);
 498 
 499                 /* unpin the parent page */
 500                 XT_PUTPAGE(mp);
 501         }
 502 }
 503 
 504 /*
 505  *      xtInsert()
 506  *
 507  * function:
 508  *
 509  * parameter:
 510  *      tid     - transaction id;
 511  *      ip      - file object;
 512  *      xflag   - extent flag (XAD_NOTRECORDED):
 513  *      xoff    - extent offset;
 514  *      xlen    - extent length;
 515  *      xaddrp  - extent address pointer (in/out):
 516  *              if (*xaddrp)
 517  *                      caller allocated data extent at *xaddrp;
 518  *              else
 519  *                      allocate data extent and return its xaddr;
 520  *      flag    -
 521  *
 522  * return:
 523  */
 524 int xtInsert(tid_t tid,         /* transaction id */
 525              struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
 526              int flag)
 527 {
 528         int rc = 0;
 529         s64 xaddr, hint;
 530         struct metapage *mp;    /* meta-page buffer */
 531         xtpage_t *p;            /* base B+-tree index page */
 532         s64 bn;
 533         int index, nextindex;
 534         struct btstack btstack; /* traverse stack */
 535         struct xtsplit split;   /* split information */
 536         xad_t *xad;
 537         int cmp;
 538         s64 next;
 539         struct tlock *tlck;
 540         struct xtlock *xtlck;
 541 
 542         jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
 543 
 544         /*
 545          *      search for the entry location at which to insert:
 546          *
 547          * xtFastSearch() and xtSearch() both returns (leaf page
 548          * pinned, index at which to insert).
 549          * n.b. xtSearch() may return index of maxentry of
 550          * the full page.
 551          */
 552         if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
 553                 return rc;
 554 
 555         /* retrieve search result */
 556         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 557 
 558         /* This test must follow XT_GETSEARCH since mp must be valid if
 559          * we branch to out: */
 560         if ((cmp == 0) || (next && (xlen > next - xoff))) {
 561                 rc = -EEXIST;
 562                 goto out;
 563         }
 564 
 565         /*
 566          * allocate data extent requested
 567          *
 568          * allocation hint: last xad
 569          */
 570         if ((xaddr = *xaddrp) == 0) {
 571                 if (index > XTENTRYSTART) {
 572                         xad = &p->xad[index - 1];
 573                         hint = addressXAD(xad) + lengthXAD(xad) - 1;
 574                 } else
 575                         hint = 0;
 576                 if ((rc = dquot_alloc_block(ip, xlen)))
 577                         goto out;
 578                 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
 579                         dquot_free_block(ip, xlen);
 580                         goto out;
 581                 }
 582         }
 583 
 584         /*
 585          *      insert entry for new extent
 586          */
 587         xflag |= XAD_NEW;
 588 
 589         /*
 590          *      if the leaf page is full, split the page and
 591          *      propagate up the router entry for the new page from split
 592          *
 593          * The xtSplitUp() will insert the entry and unpin the leaf page.
 594          */
 595         nextindex = le16_to_cpu(p->header.nextindex);
 596         if (nextindex == le16_to_cpu(p->header.maxentry)) {
 597                 split.mp = mp;
 598                 split.index = index;
 599                 split.flag = xflag;
 600                 split.off = xoff;
 601                 split.len = xlen;
 602                 split.addr = xaddr;
 603                 split.pxdlist = NULL;
 604                 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
 605                         /* undo data extent allocation */
 606                         if (*xaddrp == 0) {
 607                                 dbFree(ip, xaddr, (s64) xlen);
 608                                 dquot_free_block(ip, xlen);
 609                         }
 610                         return rc;
 611                 }
 612 
 613                 *xaddrp = xaddr;
 614                 return 0;
 615         }
 616 
 617         /*
 618          *      insert the new entry into the leaf page
 619          */
 620         /*
 621          * acquire a transaction lock on the leaf page;
 622          *
 623          * action: xad insertion/extension;
 624          */
 625         BT_MARK_DIRTY(mp, ip);
 626 
 627         /* if insert into middle, shift right remaining entries. */
 628         if (index < nextindex)
 629                 memmove(&p->xad[index + 1], &p->xad[index],
 630                         (nextindex - index) * sizeof(xad_t));
 631 
 632         /* insert the new entry: mark the entry NEW */
 633         xad = &p->xad[index];
 634         XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 635 
 636         /* advance next available entry index */
 637         le16_add_cpu(&p->header.nextindex, 1);
 638 
 639         /* Don't log it if there are no links to the file */
 640         if (!test_cflag(COMMIT_Nolink, ip)) {
 641                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 642                 xtlck = (struct xtlock *) & tlck->lock;
 643                 xtlck->lwm.offset =
 644                     (xtlck->lwm.offset) ? min(index,
 645                                               (int)xtlck->lwm.offset) : index;
 646                 xtlck->lwm.length =
 647                     le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
 648         }
 649 
 650         *xaddrp = xaddr;
 651 
 652       out:
 653         /* unpin the leaf page */
 654         XT_PUTPAGE(mp);
 655 
 656         return rc;
 657 }
 658 
 659 
 660 /*
 661  *      xtSplitUp()
 662  *
 663  * function:
 664  *      split full pages as propagating insertion up the tree
 665  *
 666  * parameter:
 667  *      tid     - transaction id;
 668  *      ip      - file object;
 669  *      split   - entry parameter descriptor;
 670  *      btstack - traverse stack from xtSearch()
 671  *
 672  * return:
 673  */
 674 static int
 675 xtSplitUp(tid_t tid,
 676           struct inode *ip, struct xtsplit * split, struct btstack * btstack)
 677 {
 678         int rc = 0;
 679         struct metapage *smp;
 680         xtpage_t *sp;           /* split page */
 681         struct metapage *rmp;
 682         s64 rbn;                /* new right page block number */
 683         struct metapage *rcmp;
 684         xtpage_t *rcp;          /* right child page */
 685         s64 rcbn;               /* right child page block number */
 686         int skip;               /* index of entry of insertion */
 687         int nextindex;          /* next available entry index of p */
 688         struct btframe *parent; /* parent page entry on traverse stack */
 689         xad_t *xad;
 690         s64 xaddr;
 691         int xlen;
 692         int nsplit;             /* number of pages split */
 693         struct pxdlist pxdlist;
 694         pxd_t *pxd;
 695         struct tlock *tlck;
 696         struct xtlock *xtlck;
 697 
 698         smp = split->mp;
 699         sp = XT_PAGE(ip, smp);
 700 
 701         /* is inode xtree root extension/inline EA area free ? */
 702         if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
 703             (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
 704             (JFS_IP(ip)->mode2 & INLINEEA)) {
 705                 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
 706                 JFS_IP(ip)->mode2 &= ~INLINEEA;
 707 
 708                 BT_MARK_DIRTY(smp, ip);
 709                 /*
 710                  * acquire a transaction lock on the leaf page;
 711                  *
 712                  * action: xad insertion/extension;
 713                  */
 714 
 715                 /* if insert into middle, shift right remaining entries. */
 716                 skip = split->index;
 717                 nextindex = le16_to_cpu(sp->header.nextindex);
 718                 if (skip < nextindex)
 719                         memmove(&sp->xad[skip + 1], &sp->xad[skip],
 720                                 (nextindex - skip) * sizeof(xad_t));
 721 
 722                 /* insert the new entry: mark the entry NEW */
 723                 xad = &sp->xad[skip];
 724                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
 725                             split->addr);
 726 
 727                 /* advance next available entry index */
 728                 le16_add_cpu(&sp->header.nextindex, 1);
 729 
 730                 /* Don't log it if there are no links to the file */
 731                 if (!test_cflag(COMMIT_Nolink, ip)) {
 732                         tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
 733                         xtlck = (struct xtlock *) & tlck->lock;
 734                         xtlck->lwm.offset = (xtlck->lwm.offset) ?
 735                             min(skip, (int)xtlck->lwm.offset) : skip;
 736                         xtlck->lwm.length =
 737                             le16_to_cpu(sp->header.nextindex) -
 738                             xtlck->lwm.offset;
 739                 }
 740 
 741                 return 0;
 742         }
 743 
 744         /*
 745          * allocate new index blocks to cover index page split(s)
 746          *
 747          * allocation hint: ?
 748          */
 749         if (split->pxdlist == NULL) {
 750                 nsplit = btstack->nsplit;
 751                 split->pxdlist = &pxdlist;
 752                 pxdlist.maxnpxd = pxdlist.npxd = 0;
 753                 pxd = &pxdlist.pxd[0];
 754                 xlen = JFS_SBI(ip->i_sb)->nbperpage;
 755                 for (; nsplit > 0; nsplit--, pxd++) {
 756                         if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
 757                             == 0) {
 758                                 PXDaddress(pxd, xaddr);
 759                                 PXDlength(pxd, xlen);
 760 
 761                                 pxdlist.maxnpxd++;
 762 
 763                                 continue;
 764                         }
 765 
 766                         /* undo allocation */
 767 
 768                         XT_PUTPAGE(smp);
 769                         return rc;
 770                 }
 771         }
 772 
 773         /*
 774          * Split leaf page <sp> into <sp> and a new right page <rp>.
 775          *
 776          * The split routines insert the new entry into the leaf page,
 777          * and acquire txLock as appropriate.
 778          * return <rp> pinned and its block number <rpbn>.
 779          */
 780         rc = (sp->header.flag & BT_ROOT) ?
 781             xtSplitRoot(tid, ip, split, &rmp) :
 782             xtSplitPage(tid, ip, split, &rmp, &rbn);
 783 
 784         XT_PUTPAGE(smp);
 785 
 786         if (rc)
 787                 return -EIO;
 788         /*
 789          * propagate up the router entry for the leaf page just split
 790          *
 791          * insert a router entry for the new page into the parent page,
 792          * propagate the insert/split up the tree by walking back the stack
 793          * of (bn of parent page, index of child page entry in parent page)
 794          * that were traversed during the search for the page that split.
 795          *
 796          * the propagation of insert/split up the tree stops if the root
 797          * splits or the page inserted into doesn't have to split to hold
 798          * the new entry.
 799          *
 800          * the parent entry for the split page remains the same, and
 801          * a new entry is inserted at its right with the first key and
 802          * block number of the new right page.
 803          *
 804          * There are a maximum of 3 pages pinned at any time:
 805          * right child, left parent and right parent (when the parent splits)
 806          * to keep the child page pinned while working on the parent.
 807          * make sure that all pins are released at exit.
 808          */
 809         while ((parent = BT_POP(btstack)) != NULL) {
 810                 /* parent page specified by stack frame <parent> */
 811 
 812                 /* keep current child pages <rcp> pinned */
 813                 rcmp = rmp;
 814                 rcbn = rbn;
 815                 rcp = XT_PAGE(ip, rcmp);
 816 
 817                 /*
 818                  * insert router entry in parent for new right child page <rp>
 819                  */
 820                 /* get/pin the parent page <sp> */
 821                 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
 822                 if (rc) {
 823                         XT_PUTPAGE(rcmp);
 824                         return rc;
 825                 }
 826 
 827                 /*
 828                  * The new key entry goes ONE AFTER the index of parent entry,
 829                  * because the split was to the right.
 830                  */
 831                 skip = parent->index + 1;
 832 
 833                 /*
 834                  * split or shift right remaining entries of the parent page
 835                  */
 836                 nextindex = le16_to_cpu(sp->header.nextindex);
 837                 /*
 838                  * parent page is full - split the parent page
 839                  */
 840                 if (nextindex == le16_to_cpu(sp->header.maxentry)) {
 841                         /* init for parent page split */
 842                         split->mp = smp;
 843                         split->index = skip;    /* index at insert */
 844                         split->flag = XAD_NEW;
 845                         split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
 846                         split->len = JFS_SBI(ip->i_sb)->nbperpage;
 847                         split->addr = rcbn;
 848 
 849                         /* unpin previous right child page */
 850                         XT_PUTPAGE(rcmp);
 851 
 852                         /* The split routines insert the new entry,
 853                          * and acquire txLock as appropriate.
 854                          * return <rp> pinned and its block number <rpbn>.
 855                          */
 856                         rc = (sp->header.flag & BT_ROOT) ?
 857                             xtSplitRoot(tid, ip, split, &rmp) :
 858                             xtSplitPage(tid, ip, split, &rmp, &rbn);
 859                         if (rc) {
 860                                 XT_PUTPAGE(smp);
 861                                 return rc;
 862                         }
 863 
 864                         XT_PUTPAGE(smp);
 865                         /* keep new child page <rp> pinned */
 866                 }
 867                 /*
 868                  * parent page is not full - insert in parent page
 869                  */
 870                 else {
 871                         /*
 872                          * insert router entry in parent for the right child
 873                          * page from the first entry of the right child page:
 874                          */
 875                         /*
 876                          * acquire a transaction lock on the parent page;
 877                          *
 878                          * action: router xad insertion;
 879                          */
 880                         BT_MARK_DIRTY(smp, ip);
 881 
 882                         /*
 883                          * if insert into middle, shift right remaining entries
 884                          */
 885                         if (skip < nextindex)
 886                                 memmove(&sp->xad[skip + 1], &sp->xad[skip],
 887                                         (nextindex -
 888                                          skip) << L2XTSLOTSIZE);
 889 
 890                         /* insert the router entry */
 891                         xad = &sp->xad[skip];
 892                         XT_PUTENTRY(xad, XAD_NEW,
 893                                     offsetXAD(&rcp->xad[XTENTRYSTART]),
 894                                     JFS_SBI(ip->i_sb)->nbperpage, rcbn);
 895 
 896                         /* advance next available entry index. */
 897                         le16_add_cpu(&sp->header.nextindex, 1);
 898 
 899                         /* Don't log it if there are no links to the file */
 900                         if (!test_cflag(COMMIT_Nolink, ip)) {
 901                                 tlck = txLock(tid, ip, smp,
 902                                               tlckXTREE | tlckGROW);
 903                                 xtlck = (struct xtlock *) & tlck->lock;
 904                                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
 905                                     min(skip, (int)xtlck->lwm.offset) : skip;
 906                                 xtlck->lwm.length =
 907                                     le16_to_cpu(sp->header.nextindex) -
 908                                     xtlck->lwm.offset;
 909                         }
 910 
 911                         /* unpin parent page */
 912                         XT_PUTPAGE(smp);
 913 
 914                         /* exit propagate up */
 915                         break;
 916                 }
 917         }
 918 
 919         /* unpin current right page */
 920         XT_PUTPAGE(rmp);
 921 
 922         return 0;
 923 }
 924 
 925 
 926 /*
 927  *      xtSplitPage()
 928  *
 929  * function:
 930  *      split a full non-root page into
 931  *      original/split/left page and new right page
 932  *      i.e., the original/split page remains as left page.
 933  *
 934  * parameter:
 935  *      int             tid,
 936  *      struct inode    *ip,
 937  *      struct xtsplit  *split,
 938  *      struct metapage **rmpp,
 939  *      u64             *rbnp,
 940  *
 941  * return:
 942  *      Pointer to page in which to insert or NULL on error.
 943  */
 944 static int
 945 xtSplitPage(tid_t tid, struct inode *ip,
 946             struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
 947 {
 948         int rc = 0;
 949         struct metapage *smp;
 950         xtpage_t *sp;
 951         struct metapage *rmp;
 952         xtpage_t *rp;           /* new right page allocated */
 953         s64 rbn;                /* new right page block number */
 954         struct metapage *mp;
 955         xtpage_t *p;
 956         s64 nextbn;
 957         int skip, maxentry, middle, righthalf, n;
 958         xad_t *xad;
 959         struct pxdlist *pxdlist;
 960         pxd_t *pxd;
 961         struct tlock *tlck;
 962         struct xtlock *sxtlck = NULL, *rxtlck = NULL;
 963         int quota_allocation = 0;
 964 
 965         smp = split->mp;
 966         sp = XT_PAGE(ip, smp);
 967 
 968         INCREMENT(xtStat.split);
 969 
 970         pxdlist = split->pxdlist;
 971         pxd = &pxdlist->pxd[pxdlist->npxd];
 972         pxdlist->npxd++;
 973         rbn = addressPXD(pxd);
 974 
 975         /* Allocate blocks to quota. */
 976         rc = dquot_alloc_block(ip, lengthPXD(pxd));
 977         if (rc)
 978                 goto clean_up;
 979 
 980         quota_allocation += lengthPXD(pxd);
 981 
 982         /*
 983          * allocate the new right page for the split
 984          */
 985         rmp = get_metapage(ip, rbn, PSIZE, 1);
 986         if (rmp == NULL) {
 987                 rc = -EIO;
 988                 goto clean_up;
 989         }
 990 
 991         jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
 992 
 993         BT_MARK_DIRTY(rmp, ip);
 994         /*
 995          * action: new page;
 996          */
 997 
 998         rp = (xtpage_t *) rmp->data;
 999         rp->header.self = *pxd;
1000         rp->header.flag = sp->header.flag & BT_TYPE;
1001         rp->header.maxentry = sp->header.maxentry;      /* little-endian */
1002         rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1003 
1004         BT_MARK_DIRTY(smp, ip);
1005         /* Don't log it if there are no links to the file */
1006         if (!test_cflag(COMMIT_Nolink, ip)) {
1007                 /*
1008                  * acquire a transaction lock on the new right page;
1009                  */
1010                 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1011                 rxtlck = (struct xtlock *) & tlck->lock;
1012                 rxtlck->lwm.offset = XTENTRYSTART;
1013                 /*
1014                  * acquire a transaction lock on the split page
1015                  */
1016                 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1017                 sxtlck = (struct xtlock *) & tlck->lock;
1018         }
1019 
1020         /*
1021          * initialize/update sibling pointers of <sp> and <rp>
1022          */
1023         nextbn = le64_to_cpu(sp->header.next);
1024         rp->header.next = cpu_to_le64(nextbn);
1025         rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1026         sp->header.next = cpu_to_le64(rbn);
1027 
1028         skip = split->index;
1029 
1030         /*
1031          *      sequential append at tail (after last entry of last page)
1032          *
1033          * if splitting the last page on a level because of appending
1034          * a entry to it (skip is maxentry), it's likely that the access is
1035          * sequential. adding an empty page on the side of the level is less
1036          * work and can push the fill factor much higher than normal.
1037          * if we're wrong it's no big deal -  we will do the split the right
1038          * way next time.
1039          * (it may look like it's equally easy to do a similar hack for
1040          * reverse sorted data, that is, split the tree left, but it's not.
1041          * Be my guest.)
1042          */
1043         if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1044                 /*
1045                  * acquire a transaction lock on the new/right page;
1046                  *
1047                  * action: xad insertion;
1048                  */
1049                 /* insert entry at the first entry of the new right page */
1050                 xad = &rp->xad[XTENTRYSTART];
1051                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1052                             split->addr);
1053 
1054                 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1055 
1056                 if (!test_cflag(COMMIT_Nolink, ip)) {
1057                         /* rxtlck->lwm.offset = XTENTRYSTART; */
1058                         rxtlck->lwm.length = 1;
1059                 }
1060 
1061                 *rmpp = rmp;
1062                 *rbnp = rbn;
1063 
1064                 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1065                 return 0;
1066         }
1067 
1068         /*
1069          *      non-sequential insert (at possibly middle page)
1070          */
1071 
1072         /*
1073          * update previous pointer of old next/right page of <sp>
1074          */
1075         if (nextbn != 0) {
1076                 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1077                 if (rc) {
1078                         XT_PUTPAGE(rmp);
1079                         goto clean_up;
1080                 }
1081 
1082                 BT_MARK_DIRTY(mp, ip);
1083                 /*
1084                  * acquire a transaction lock on the next page;
1085                  *
1086                  * action:sibling pointer update;
1087                  */
1088                 if (!test_cflag(COMMIT_Nolink, ip))
1089                         tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1090 
1091                 p->header.prev = cpu_to_le64(rbn);
1092 
1093                 /* sibling page may have been updated previously, or
1094                  * it may be updated later;
1095                  */
1096 
1097                 XT_PUTPAGE(mp);
1098         }
1099 
1100         /*
1101          * split the data between the split and new/right pages
1102          */
1103         maxentry = le16_to_cpu(sp->header.maxentry);
1104         middle = maxentry >> 1;
1105         righthalf = maxentry - middle;
1106 
1107         /*
1108          * skip index in old split/left page - insert into left page:
1109          */
1110         if (skip <= middle) {
1111                 /* move right half of split page to the new right page */
1112                 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1113                         righthalf << L2XTSLOTSIZE);
1114 
1115                 /* shift right tail of left half to make room for new entry */
1116                 if (skip < middle)
1117                         memmove(&sp->xad[skip + 1], &sp->xad[skip],
1118                                 (middle - skip) << L2XTSLOTSIZE);
1119 
1120                 /* insert new entry */
1121                 xad = &sp->xad[skip];
1122                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1123                             split->addr);
1124 
1125                 /* update page header */
1126                 sp->header.nextindex = cpu_to_le16(middle + 1);
1127                 if (!test_cflag(COMMIT_Nolink, ip)) {
1128                         sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1129                             min(skip, (int)sxtlck->lwm.offset) : skip;
1130                 }
1131 
1132                 rp->header.nextindex =
1133                     cpu_to_le16(XTENTRYSTART + righthalf);
1134         }
1135         /*
1136          * skip index in new right page - insert into right page:
1137          */
1138         else {
1139                 /* move left head of right half to right page */
1140                 n = skip - middle;
1141                 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1142                         n << L2XTSLOTSIZE);
1143 
1144                 /* insert new entry */
1145                 n += XTENTRYSTART;
1146                 xad = &rp->xad[n];
1147                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
1148                             split->addr);
1149 
1150                 /* move right tail of right half to right page */
1151                 if (skip < maxentry)
1152                         memmove(&rp->xad[n + 1], &sp->xad[skip],
1153                                 (maxentry - skip) << L2XTSLOTSIZE);
1154 
1155                 /* update page header */
1156                 sp->header.nextindex = cpu_to_le16(middle);
1157                 if (!test_cflag(COMMIT_Nolink, ip)) {
1158                         sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1159                             min(middle, (int)sxtlck->lwm.offset) : middle;
1160                 }
1161 
1162                 rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1163                                                    righthalf + 1);
1164         }
1165 
1166         if (!test_cflag(COMMIT_Nolink, ip)) {
1167                 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1168                     sxtlck->lwm.offset;
1169 
1170                 /* rxtlck->lwm.offset = XTENTRYSTART; */
1171                 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1172                     XTENTRYSTART;
1173         }
1174 
1175         *rmpp = rmp;
1176         *rbnp = rbn;
1177 
1178         jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1179         return rc;
1180 
1181       clean_up:
1182 
1183         /* Rollback quota allocation. */
1184         if (quota_allocation)
1185                 dquot_free_block(ip, quota_allocation);
1186 
1187         return (rc);
1188 }
1189 
1190 
1191 /*
1192  *      xtSplitRoot()
1193  *
1194  * function:
1195  *      split the full root page into original/root/split page and new
1196  *      right page
1197  *      i.e., root remains fixed in tree anchor (inode) and the root is
1198  *      copied to a single new right child page since root page <<
1199  *      non-root page, and the split root page contains a single entry
1200  *      for the new right child page.
1201  *
1202  * parameter:
1203  *      int             tid,
1204  *      struct inode    *ip,
1205  *      struct xtsplit  *split,
1206  *      struct metapage **rmpp)
1207  *
1208  * return:
1209  *      Pointer to page in which to insert or NULL on error.
1210  */
1211 static int
1212 xtSplitRoot(tid_t tid,
1213             struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1214 {
1215         xtpage_t *sp;
1216         struct metapage *rmp;
1217         xtpage_t *rp;
1218         s64 rbn;
1219         int skip, nextindex;
1220         xad_t *xad;
1221         pxd_t *pxd;
1222         struct pxdlist *pxdlist;
1223         struct tlock *tlck;
1224         struct xtlock *xtlck;
1225         int rc;
1226 
1227         sp = &JFS_IP(ip)->i_xtroot;
1228 
1229         INCREMENT(xtStat.split);
1230 
1231         /*
1232          *      allocate a single (right) child page
1233          */
1234         pxdlist = split->pxdlist;
1235         pxd = &pxdlist->pxd[pxdlist->npxd];
1236         pxdlist->npxd++;
1237         rbn = addressPXD(pxd);
1238         rmp = get_metapage(ip, rbn, PSIZE, 1);
1239         if (rmp == NULL)
1240                 return -EIO;
1241 
1242         /* Allocate blocks to quota. */
1243         rc = dquot_alloc_block(ip, lengthPXD(pxd));
1244         if (rc) {
1245                 release_metapage(rmp);
1246                 return rc;
1247         }
1248 
1249         jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1250 
1251         /*
1252          * acquire a transaction lock on the new right page;
1253          *
1254          * action: new page;
1255          */
1256         BT_MARK_DIRTY(rmp, ip);
1257 
1258         rp = (xtpage_t *) rmp->data;
1259         rp->header.flag =
1260             (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1261         rp->header.self = *pxd;
1262         rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1263         rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1264 
1265         /* initialize sibling pointers */
1266         rp->header.next = 0;
1267         rp->header.prev = 0;
1268 
1269         /*
1270          * copy the in-line root page into new right page extent
1271          */
1272         nextindex = le16_to_cpu(sp->header.maxentry);
1273         memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1274                 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1275 
1276         /*
1277          * insert the new entry into the new right/child page
1278          * (skip index in the new right page will not change)
1279          */
1280         skip = split->index;
1281         /* if insert into middle, shift right remaining entries */
1282         if (skip != nextindex)
1283                 memmove(&rp->xad[skip + 1], &rp->xad[skip],
1284                         (nextindex - skip) * sizeof(xad_t));
1285 
1286         xad = &rp->xad[skip];
1287         XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1288 
1289         /* update page header */
1290         rp->header.nextindex = cpu_to_le16(nextindex + 1);
1291 
1292         if (!test_cflag(COMMIT_Nolink, ip)) {
1293                 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1294                 xtlck = (struct xtlock *) & tlck->lock;
1295                 xtlck->lwm.offset = XTENTRYSTART;
1296                 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1297                     XTENTRYSTART;
1298         }
1299 
1300         /*
1301          *      reset the root
1302          *
1303          * init root with the single entry for the new right page
1304          * set the 1st entry offset to 0, which force the left-most key
1305          * at any level of the tree to be less than any search key.
1306          */
1307         /*
1308          * acquire a transaction lock on the root page (in-memory inode);
1309          *
1310          * action: root split;
1311          */
1312         BT_MARK_DIRTY(split->mp, ip);
1313 
1314         xad = &sp->xad[XTENTRYSTART];
1315         XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1316 
1317         /* update page header of root */
1318         sp->header.flag &= ~BT_LEAF;
1319         sp->header.flag |= BT_INTERNAL;
1320 
1321         sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1322 
1323         if (!test_cflag(COMMIT_Nolink, ip)) {
1324                 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1325                 xtlck = (struct xtlock *) & tlck->lock;
1326                 xtlck->lwm.offset = XTENTRYSTART;
1327                 xtlck->lwm.length = 1;
1328         }
1329 
1330         *rmpp = rmp;
1331 
1332         jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1333         return 0;
1334 }
1335 
1336 
1337 /*
1338  *      xtExtend()
1339  *
1340  * function: extend in-place;
1341  *
1342  * note: existing extent may or may not have been committed.
1343  * caller is responsible for pager buffer cache update, and
1344  * working block allocation map update;
1345  * update pmap: alloc whole extended extent;
1346  */
1347 int xtExtend(tid_t tid,         /* transaction id */
1348              struct inode *ip, s64 xoff,        /* delta extent offset */
1349              s32 xlen,          /* delta extent length */
1350              int flag)
1351 {
1352         int rc = 0;
1353         int cmp;
1354         struct metapage *mp;    /* meta-page buffer */
1355         xtpage_t *p;            /* base B+-tree index page */
1356         s64 bn;
1357         int index, nextindex, len;
1358         struct btstack btstack; /* traverse stack */
1359         struct xtsplit split;   /* split information */
1360         xad_t *xad;
1361         s64 xaddr;
1362         struct tlock *tlck;
1363         struct xtlock *xtlck = NULL;
1364 
1365         jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1366 
1367         /* there must exist extent to be extended */
1368         if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1369                 return rc;
1370 
1371         /* retrieve search result */
1372         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1373 
1374         if (cmp != 0) {
1375                 XT_PUTPAGE(mp);
1376                 jfs_error(ip->i_sb, "xtSearch did not find extent\n");
1377                 return -EIO;
1378         }
1379 
1380         /* extension must be contiguous */
1381         xad = &p->xad[index];
1382         if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1383                 XT_PUTPAGE(mp);
1384                 jfs_error(ip->i_sb, "extension is not contiguous\n");
1385                 return -EIO;
1386         }
1387 
1388         /*
1389          * acquire a transaction lock on the leaf page;
1390          *
1391          * action: xad insertion/extension;
1392          */
1393         BT_MARK_DIRTY(mp, ip);
1394         if (!test_cflag(COMMIT_Nolink, ip)) {
1395                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1396                 xtlck = (struct xtlock *) & tlck->lock;
1397         }
1398 
1399         /* extend will overflow extent ? */
1400         xlen = lengthXAD(xad) + xlen;
1401         if ((len = xlen - MAXXLEN) <= 0)
1402                 goto extendOld;
1403 
1404         /*
1405          *      extent overflow: insert entry for new extent
1406          */
1407 //insertNew:
1408         xoff = offsetXAD(xad) + MAXXLEN;
1409         xaddr = addressXAD(xad) + MAXXLEN;
1410         nextindex = le16_to_cpu(p->header.nextindex);
1411 
1412         /*
1413          *      if the leaf page is full, insert the new entry and
1414          *      propagate up the router entry for the new page from split
1415          *
1416          * The xtSplitUp() will insert the entry and unpin the leaf page.
1417          */
1418         if (nextindex == le16_to_cpu(p->header.maxentry)) {
1419                 /* xtSpliUp() unpins leaf pages */
1420                 split.mp = mp;
1421                 split.index = index + 1;
1422                 split.flag = XAD_NEW;
1423                 split.off = xoff;       /* split offset */
1424                 split.len = len;
1425                 split.addr = xaddr;
1426                 split.pxdlist = NULL;
1427                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1428                         return rc;
1429 
1430                 /* get back old page */
1431                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1432                 if (rc)
1433                         return rc;
1434                 /*
1435                  * if leaf root has been split, original root has been
1436                  * copied to new child page, i.e., original entry now
1437                  * resides on the new child page;
1438                  */
1439                 if (p->header.flag & BT_INTERNAL) {
1440                         ASSERT(p->header.nextindex ==
1441                                cpu_to_le16(XTENTRYSTART + 1));
1442                         xad = &p->xad[XTENTRYSTART];
1443                         bn = addressXAD(xad);
1444                         XT_PUTPAGE(mp);
1445 
1446                         /* get new child page */
1447                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1448                         if (rc)
1449                                 return rc;
1450 
1451                         BT_MARK_DIRTY(mp, ip);
1452                         if (!test_cflag(COMMIT_Nolink, ip)) {
1453                                 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1454                                 xtlck = (struct xtlock *) & tlck->lock;
1455                         }
1456                 }
1457         }
1458         /*
1459          *      insert the new entry into the leaf page
1460          */
1461         else {
1462                 /* insert the new entry: mark the entry NEW */
1463                 xad = &p->xad[index + 1];
1464                 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1465 
1466                 /* advance next available entry index */
1467                 le16_add_cpu(&p->header.nextindex, 1);
1468         }
1469 
1470         /* get back old entry */
1471         xad = &p->xad[index];
1472         xlen = MAXXLEN;
1473 
1474         /*
1475          * extend old extent
1476          */
1477       extendOld:
1478         XADlength(xad, xlen);
1479         if (!(xad->flag & XAD_NEW))
1480                 xad->flag |= XAD_EXTENDED;
1481 
1482         if (!test_cflag(COMMIT_Nolink, ip)) {
1483                 xtlck->lwm.offset =
1484                     (xtlck->lwm.offset) ? min(index,
1485                                               (int)xtlck->lwm.offset) : index;
1486                 xtlck->lwm.length =
1487                     le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1488         }
1489 
1490         /* unpin the leaf page */
1491         XT_PUTPAGE(mp);
1492 
1493         return rc;
1494 }
1495 
1496 #ifdef _NOTYET
1497 /*
1498  *      xtTailgate()
1499  *
1500  * function: split existing 'tail' extent
1501  *      (split offset >= start offset of tail extent), and
1502  *      relocate and extend the split tail half;
1503  *
1504  * note: existing extent may or may not have been committed.
1505  * caller is responsible for pager buffer cache update, and
1506  * working block allocation map update;
1507  * update pmap: free old split tail extent, alloc new extent;
1508  */
1509 int xtTailgate(tid_t tid,               /* transaction id */
1510                struct inode *ip, s64 xoff,      /* split/new extent offset */
1511                s32 xlen,        /* new extent length */
1512                s64 xaddr,       /* new extent address */
1513                int flag)
1514 {
1515         int rc = 0;
1516         int cmp;
1517         struct metapage *mp;    /* meta-page buffer */
1518         xtpage_t *p;            /* base B+-tree index page */
1519         s64 bn;
1520         int index, nextindex, llen, rlen;
1521         struct btstack btstack; /* traverse stack */
1522         struct xtsplit split;   /* split information */
1523         xad_t *xad;
1524         struct tlock *tlck;
1525         struct xtlock *xtlck = 0;
1526         struct tlock *mtlck;
1527         struct maplock *pxdlock;
1528 
1529 /*
1530 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1531         (ulong)xoff, xlen, (ulong)xaddr);
1532 */
1533 
1534         /* there must exist extent to be tailgated */
1535         if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1536                 return rc;
1537 
1538         /* retrieve search result */
1539         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1540 
1541         if (cmp != 0) {
1542                 XT_PUTPAGE(mp);
1543                 jfs_error(ip->i_sb, "couldn't find extent\n");
1544                 return -EIO;
1545         }
1546 
1547         /* entry found must be last entry */
1548         nextindex = le16_to_cpu(p->header.nextindex);
1549         if (index != nextindex - 1) {
1550                 XT_PUTPAGE(mp);
1551                 jfs_error(ip->i_sb, "the entry found is not the last entry\n");
1552                 return -EIO;
1553         }
1554 
1555         BT_MARK_DIRTY(mp, ip);
1556         /*
1557          * acquire tlock of the leaf page containing original entry
1558          */
1559         if (!test_cflag(COMMIT_Nolink, ip)) {
1560                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1561                 xtlck = (struct xtlock *) & tlck->lock;
1562         }
1563 
1564         /* completely replace extent ? */
1565         xad = &p->xad[index];
1566 /*
1567 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1568         (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1569 */
1570         if ((llen = xoff - offsetXAD(xad)) == 0)
1571                 goto updateOld;
1572 
1573         /*
1574          *      partially replace extent: insert entry for new extent
1575          */
1576 //insertNew:
1577         /*
1578          *      if the leaf page is full, insert the new entry and
1579          *      propagate up the router entry for the new page from split
1580          *
1581          * The xtSplitUp() will insert the entry and unpin the leaf page.
1582          */
1583         if (nextindex == le16_to_cpu(p->header.maxentry)) {
1584                 /* xtSpliUp() unpins leaf pages */
1585                 split.mp = mp;
1586                 split.index = index + 1;
1587                 split.flag = XAD_NEW;
1588                 split.off = xoff;       /* split offset */
1589                 split.len = xlen;
1590                 split.addr = xaddr;
1591                 split.pxdlist = NULL;
1592                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1593                         return rc;
1594 
1595                 /* get back old page */
1596                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1597                 if (rc)
1598                         return rc;
1599                 /*
1600                  * if leaf root has been split, original root has been
1601                  * copied to new child page, i.e., original entry now
1602                  * resides on the new child page;
1603                  */
1604                 if (p->header.flag & BT_INTERNAL) {
1605                         ASSERT(p->header.nextindex ==
1606                                cpu_to_le16(XTENTRYSTART + 1));
1607                         xad = &p->xad[XTENTRYSTART];
1608                         bn = addressXAD(xad);
1609                         XT_PUTPAGE(mp);
1610 
1611                         /* get new child page */
1612                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1613                         if (rc)
1614                                 return rc;
1615 
1616                         BT_MARK_DIRTY(mp, ip);
1617                         if (!test_cflag(COMMIT_Nolink, ip)) {
1618                                 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1619                                 xtlck = (struct xtlock *) & tlck->lock;
1620                         }
1621                 }
1622         }
1623         /*
1624          *      insert the new entry into the leaf page
1625          */
1626         else {
1627                 /* insert the new entry: mark the entry NEW */
1628                 xad = &p->xad[index + 1];
1629                 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1630 
1631                 /* advance next available entry index */
1632                 le16_add_cpu(&p->header.nextindex, 1);
1633         }
1634 
1635         /* get back old XAD */
1636         xad = &p->xad[index];
1637 
1638         /*
1639          * truncate/relocate old extent at split offset
1640          */
1641       updateOld:
1642         /* update dmap for old/committed/truncated extent */
1643         rlen = lengthXAD(xad) - llen;
1644         if (!(xad->flag & XAD_NEW)) {
1645                 /* free from PWMAP at commit */
1646                 if (!test_cflag(COMMIT_Nolink, ip)) {
1647                         mtlck = txMaplock(tid, ip, tlckMAP);
1648                         pxdlock = (struct maplock *) & mtlck->lock;
1649                         pxdlock->flag = mlckFREEPXD;
1650                         PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
1651                         PXDlength(&pxdlock->pxd, rlen);
1652                         pxdlock->index = 1;
1653                 }
1654         } else
1655                 /* free from WMAP */
1656                 dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
1657 
1658         if (llen)
1659                 /* truncate */
1660                 XADlength(xad, llen);
1661         else
1662                 /* replace */
1663                 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
1664 
1665         if (!test_cflag(COMMIT_Nolink, ip)) {
1666                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
1667                     min(index, (int)xtlck->lwm.offset) : index;
1668                 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1669                     xtlck->lwm.offset;
1670         }
1671 
1672         /* unpin the leaf page */
1673         XT_PUTPAGE(mp);
1674 
1675         return rc;
1676 }
1677 #endif /* _NOTYET */
1678 
1679 /*
1680  *      xtUpdate()
1681  *
1682  * function: update XAD;
1683  *
1684  *      update extent for allocated_but_not_recorded or
1685  *      compressed extent;
1686  *
1687  * parameter:
1688  *      nxad    - new XAD;
1689  *              logical extent of the specified XAD must be completely
1690  *              contained by an existing XAD;
1691  */
1692 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1693 {                               /* new XAD */
1694         int rc = 0;
1695         int cmp;
1696         struct metapage *mp;    /* meta-page buffer */
1697         xtpage_t *p;            /* base B+-tree index page */
1698         s64 bn;
1699         int index0, index, newindex, nextindex;
1700         struct btstack btstack; /* traverse stack */
1701         struct xtsplit split;   /* split information */
1702         xad_t *xad, *lxad, *rxad;
1703         int xflag;
1704         s64 nxoff, xoff;
1705         int nxlen, xlen, lxlen, rxlen;
1706         s64 nxaddr, xaddr;
1707         struct tlock *tlck;
1708         struct xtlock *xtlck = NULL;
1709         int newpage = 0;
1710 
1711         /* there must exist extent to be tailgated */
1712         nxoff = offsetXAD(nxad);
1713         nxlen = lengthXAD(nxad);
1714         nxaddr = addressXAD(nxad);
1715 
1716         if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1717                 return rc;
1718 
1719         /* retrieve search result */
1720         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1721 
1722         if (cmp != 0) {
1723                 XT_PUTPAGE(mp);
1724                 jfs_error(ip->i_sb, "Could not find extent\n");
1725                 return -EIO;
1726         }
1727 
1728         BT_MARK_DIRTY(mp, ip);
1729         /*
1730          * acquire tlock of the leaf page containing original entry
1731          */
1732         if (!test_cflag(COMMIT_Nolink, ip)) {
1733                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1734                 xtlck = (struct xtlock *) & tlck->lock;
1735         }
1736 
1737         xad = &p->xad[index0];
1738         xflag = xad->flag;
1739         xoff = offsetXAD(xad);
1740         xlen = lengthXAD(xad);
1741         xaddr = addressXAD(xad);
1742 
1743         /* nXAD must be completely contained within XAD */
1744         if ((xoff > nxoff) ||
1745             (nxoff + nxlen > xoff + xlen)) {
1746                 XT_PUTPAGE(mp);
1747                 jfs_error(ip->i_sb,
1748                           "nXAD in not completely contained within XAD\n");
1749                 return -EIO;
1750         }
1751 
1752         index = index0;
1753         newindex = index + 1;
1754         nextindex = le16_to_cpu(p->header.nextindex);
1755 
1756 #ifdef  _JFS_WIP_NOCOALESCE
1757         if (xoff < nxoff)
1758                 goto updateRight;
1759 
1760         /*
1761          * replace XAD with nXAD
1762          */
1763       replace:                  /* (nxoff == xoff) */
1764         if (nxlen == xlen) {
1765                 /* replace XAD with nXAD:recorded */
1766                 *xad = *nxad;
1767                 xad->flag = xflag & ~XAD_NOTRECORDED;
1768 
1769                 goto out;
1770         } else                  /* (nxlen < xlen) */
1771                 goto updateLeft;
1772 #endif                          /* _JFS_WIP_NOCOALESCE */
1773 
1774 /* #ifdef _JFS_WIP_COALESCE */
1775         if (xoff < nxoff)
1776                 goto coalesceRight;
1777 
1778         /*
1779          * coalesce with left XAD
1780          */
1781 //coalesceLeft: /* (xoff == nxoff) */
1782         /* is XAD first entry of page ? */
1783         if (index == XTENTRYSTART)
1784                 goto replace;
1785 
1786         /* is nXAD logically and physically contiguous with lXAD ? */
1787         lxad = &p->xad[index - 1];
1788         lxlen = lengthXAD(lxad);
1789         if (!(lxad->flag & XAD_NOTRECORDED) &&
1790             (nxoff == offsetXAD(lxad) + lxlen) &&
1791             (nxaddr == addressXAD(lxad) + lxlen) &&
1792             (lxlen + nxlen < MAXXLEN)) {
1793                 /* extend right lXAD */
1794                 index0 = index - 1;
1795                 XADlength(lxad, lxlen + nxlen);
1796 
1797                 /* If we just merged two extents together, need to make sure the
1798                  * right extent gets logged.  If the left one is marked XAD_NEW,
1799                  * then we know it will be logged.  Otherwise, mark as
1800                  * XAD_EXTENDED
1801                  */
1802                 if (!(lxad->flag & XAD_NEW))
1803                         lxad->flag |= XAD_EXTENDED;
1804 
1805                 if (xlen > nxlen) {
1806                         /* truncate XAD */
1807                         XADoffset(xad, xoff + nxlen);
1808                         XADlength(xad, xlen - nxlen);
1809                         XADaddress(xad, xaddr + nxlen);
1810                         goto out;
1811                 } else {        /* (xlen == nxlen) */
1812 
1813                         /* remove XAD */
1814                         if (index < nextindex - 1)
1815                                 memmove(&p->xad[index], &p->xad[index + 1],
1816                                         (nextindex - index -
1817                                          1) << L2XTSLOTSIZE);
1818 
1819                         p->header.nextindex =
1820                             cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1821                                         1);
1822 
1823                         index = index0;
1824                         newindex = index + 1;
1825                         nextindex = le16_to_cpu(p->header.nextindex);
1826                         xoff = nxoff = offsetXAD(lxad);
1827                         xlen = nxlen = lxlen + nxlen;
1828                         xaddr = nxaddr = addressXAD(lxad);
1829                         goto coalesceRight;
1830                 }
1831         }
1832 
1833         /*
1834          * replace XAD with nXAD
1835          */
1836       replace:                  /* (nxoff == xoff) */
1837         if (nxlen == xlen) {
1838                 /* replace XAD with nXAD:recorded */
1839                 *xad = *nxad;
1840                 xad->flag = xflag & ~XAD_NOTRECORDED;
1841 
1842                 goto coalesceRight;
1843         } else                  /* (nxlen < xlen) */
1844                 goto updateLeft;
1845 
1846         /*
1847          * coalesce with right XAD
1848          */
1849       coalesceRight:            /* (xoff <= nxoff) */
1850         /* is XAD last entry of page ? */
1851         if (newindex == nextindex) {
1852                 if (xoff == nxoff)
1853                         goto out;
1854                 goto updateRight;
1855         }
1856 
1857         /* is nXAD logically and physically contiguous with rXAD ? */
1858         rxad = &p->xad[index + 1];
1859         rxlen = lengthXAD(rxad);
1860         if (!(rxad->flag & XAD_NOTRECORDED) &&
1861             (nxoff + nxlen == offsetXAD(rxad)) &&
1862             (nxaddr + nxlen == addressXAD(rxad)) &&
1863             (rxlen + nxlen < MAXXLEN)) {
1864                 /* extend left rXAD */
1865                 XADoffset(rxad, nxoff);
1866                 XADlength(rxad, rxlen + nxlen);
1867                 XADaddress(rxad, nxaddr);
1868 
1869                 /* If we just merged two extents together, need to make sure
1870                  * the left extent gets logged.  If the right one is marked
1871                  * XAD_NEW, then we know it will be logged.  Otherwise, mark as
1872                  * XAD_EXTENDED
1873                  */
1874                 if (!(rxad->flag & XAD_NEW))
1875                         rxad->flag |= XAD_EXTENDED;
1876 
1877                 if (xlen > nxlen)
1878                         /* truncate XAD */
1879                         XADlength(xad, xlen - nxlen);
1880                 else {          /* (xlen == nxlen) */
1881 
1882                         /* remove XAD */
1883                         memmove(&p->xad[index], &p->xad[index + 1],
1884                                 (nextindex - index - 1) << L2XTSLOTSIZE);
1885 
1886                         p->header.nextindex =
1887                             cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1888                                         1);
1889                 }
1890 
1891                 goto out;
1892         } else if (xoff == nxoff)
1893                 goto out;
1894 
1895         if (xoff >= nxoff) {
1896                 XT_PUTPAGE(mp);
1897                 jfs_error(ip->i_sb, "xoff >= nxoff\n");
1898                 return -EIO;
1899         }
1900 /* #endif _JFS_WIP_COALESCE */
1901 
1902         /*
1903          * split XAD into (lXAD, nXAD):
1904          *
1905          *          |---nXAD--->
1906          * --|----------XAD----------|--
1907          *   |-lXAD-|
1908          */
1909       updateRight:              /* (xoff < nxoff) */
1910         /* truncate old XAD as lXAD:not_recorded */
1911         xad = &p->xad[index];
1912         XADlength(xad, nxoff - xoff);
1913 
1914         /* insert nXAD:recorded */
1915         if (nextindex == le16_to_cpu(p->header.maxentry)) {
1916 
1917                 /* xtSpliUp() unpins leaf pages */
1918                 split.mp = mp;
1919                 split.index = newindex;
1920                 split.flag = xflag & ~XAD_NOTRECORDED;
1921                 split.off = nxoff;
1922                 split.len = nxlen;
1923                 split.addr = nxaddr;
1924                 split.pxdlist = NULL;
1925                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1926                         return rc;
1927 
1928                 /* get back old page */
1929                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1930                 if (rc)
1931                         return rc;
1932                 /*
1933                  * if leaf root has been split, original root has been
1934                  * copied to new child page, i.e., original entry now
1935                  * resides on the new child page;
1936                  */
1937                 if (p->header.flag & BT_INTERNAL) {
1938                         ASSERT(p->header.nextindex ==
1939                                cpu_to_le16(XTENTRYSTART + 1));
1940                         xad = &p->xad[XTENTRYSTART];
1941                         bn = addressXAD(xad);
1942                         XT_PUTPAGE(mp);
1943 
1944                         /* get new child page */
1945                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
1946                         if (rc)
1947                                 return rc;
1948 
1949                         BT_MARK_DIRTY(mp, ip);
1950                         if (!test_cflag(COMMIT_Nolink, ip)) {
1951                                 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1952                                 xtlck = (struct xtlock *) & tlck->lock;
1953                         }
1954                 } else {
1955                         /* is nXAD on new page ? */
1956                         if (newindex >
1957                             (le16_to_cpu(p->header.maxentry) >> 1)) {
1958                                 newindex =
1959                                     newindex -
1960                                     le16_to_cpu(p->header.nextindex) +
1961                                     XTENTRYSTART;
1962                                 newpage = 1;
1963                         }
1964                 }
1965         } else {
1966                 /* if insert into middle, shift right remaining entries */
1967                 if (newindex < nextindex)
1968                         memmove(&p->xad[newindex + 1], &p->xad[newindex],
1969                                 (nextindex - newindex) << L2XTSLOTSIZE);
1970 
1971                 /* insert the entry */
1972                 xad = &p->xad[newindex];
1973                 *xad = *nxad;
1974                 xad->flag = xflag & ~XAD_NOTRECORDED;
1975 
1976                 /* advance next available entry index. */
1977                 p->header.nextindex =
1978                     cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1979         }
1980 
1981         /*
1982          * does nXAD force 3-way split ?
1983          *
1984          *          |---nXAD--->|
1985          * --|----------XAD-------------|--
1986          *   |-lXAD-|           |-rXAD -|
1987          */
1988         if (nxoff + nxlen == xoff + xlen)
1989                 goto out;
1990 
1991         /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
1992         if (newpage) {
1993                 /* close out old page */
1994                 if (!test_cflag(COMMIT_Nolink, ip)) {
1995                         xtlck->lwm.offset = (xtlck->lwm.offset) ?
1996                             min(index0, (int)xtlck->lwm.offset) : index0;
1997                         xtlck->lwm.length =
1998                             le16_to_cpu(p->header.nextindex) -
1999                             xtlck->lwm.offset;
2000                 }
2001 
2002                 bn = le64_to_cpu(p->header.next);
2003                 XT_PUTPAGE(mp);
2004 
2005                 /* get new right page */
2006                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2007                 if (rc)
2008                         return rc;
2009 
2010                 BT_MARK_DIRTY(mp, ip);
2011                 if (!test_cflag(COMMIT_Nolink, ip)) {
2012                         tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2013                         xtlck = (struct xtlock *) & tlck->lock;
2014                 }
2015 
2016                 index0 = index = newindex;
2017         } else
2018                 index++;
2019 
2020         newindex = index + 1;
2021         nextindex = le16_to_cpu(p->header.nextindex);
2022         xlen = xlen - (nxoff - xoff);
2023         xoff = nxoff;
2024         xaddr = nxaddr;
2025 
2026         /* recompute split pages */
2027         if (nextindex == le16_to_cpu(p->header.maxentry)) {
2028                 XT_PUTPAGE(mp);
2029 
2030                 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
2031                         return rc;
2032 
2033                 /* retrieve search result */
2034                 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
2035 
2036                 if (cmp != 0) {
2037                         XT_PUTPAGE(mp);
2038                         jfs_error(ip->i_sb, "xtSearch failed\n");
2039                         return -EIO;
2040                 }
2041 
2042                 if (index0 != index) {
2043                         XT_PUTPAGE(mp);
2044                         jfs_error(ip->i_sb, "unexpected value of index\n");
2045                         return -EIO;
2046                 }
2047         }
2048 
2049         /*
2050          * split XAD into (nXAD, rXAD)
2051          *
2052          *          ---nXAD---|
2053          * --|----------XAD----------|--
2054          *                    |-rXAD-|
2055          */
2056       updateLeft:               /* (nxoff == xoff) && (nxlen < xlen) */
2057         /* update old XAD with nXAD:recorded */
2058         xad = &p->xad[index];
2059         *xad = *nxad;
2060         xad->flag = xflag & ~XAD_NOTRECORDED;
2061 
2062         /* insert rXAD:not_recorded */
2063         xoff = xoff + nxlen;
2064         xlen = xlen - nxlen;
2065         xaddr = xaddr + nxlen;
2066         if (nextindex == le16_to_cpu(p->header.maxentry)) {
2067 /*
2068 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2069 */
2070                 /* xtSpliUp() unpins leaf pages */
2071                 split.mp = mp;
2072                 split.index = newindex;
2073                 split.flag = xflag;
2074                 split.off = xoff;
2075                 split.len = xlen;
2076                 split.addr = xaddr;
2077                 split.pxdlist = NULL;
2078                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
2079                         return rc;
2080 
2081                 /* get back old page */
2082                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2083                 if (rc)
2084                         return rc;
2085 
2086                 /*
2087                  * if leaf root has been split, original root has been
2088                  * copied to new child page, i.e., original entry now
2089                  * resides on the new child page;
2090                  */
2091                 if (p->header.flag & BT_INTERNAL) {
2092                         ASSERT(p->header.nextindex ==
2093                                cpu_to_le16(XTENTRYSTART + 1));
2094                         xad = &p->xad[XTENTRYSTART];
2095                         bn = addressXAD(xad);
2096                         XT_PUTPAGE(mp);
2097 
2098                         /* get new child page */
2099                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2100                         if (rc)
2101                                 return rc;
2102 
2103                         BT_MARK_DIRTY(mp, ip);
2104                         if (!test_cflag(COMMIT_Nolink, ip)) {
2105                                 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2106                                 xtlck = (struct xtlock *) & tlck->lock;
2107                         }
2108                 }
2109         } else {
2110                 /* if insert into middle, shift right remaining entries */
2111                 if (newindex < nextindex)
2112                         memmove(&p->xad[newindex + 1], &p->xad[newindex],
2113                                 (nextindex - newindex) << L2XTSLOTSIZE);
2114 
2115                 /* insert the entry */
2116                 xad = &p->xad[newindex];
2117                 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2118 
2119                 /* advance next available entry index. */
2120                 p->header.nextindex =
2121                     cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
2122         }
2123 
2124       out:
2125         if (!test_cflag(COMMIT_Nolink, ip)) {
2126                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
2127                     min(index0, (int)xtlck->lwm.offset) : index0;
2128                 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2129                     xtlck->lwm.offset;
2130         }
2131 
2132         /* unpin the leaf page */
2133         XT_PUTPAGE(mp);
2134 
2135         return rc;
2136 }
2137 
2138 
2139 /*
2140  *      xtAppend()
2141  *
2142  * function: grow in append mode from contiguous region specified ;
2143  *
2144  * parameter:
2145  *      tid             - transaction id;
2146  *      ip              - file object;
2147  *      xflag           - extent flag:
2148  *      xoff            - extent offset;
2149  *      maxblocks       - max extent length;
2150  *      xlen            - extent length (in/out);
2151  *      xaddrp          - extent address pointer (in/out):
2152  *      flag            -
2153  *
2154  * return:
2155  */
2156 int xtAppend(tid_t tid,         /* transaction id */
2157              struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
2158              s32 * xlenp,       /* (in/out) */
2159              s64 * xaddrp,      /* (in/out) */
2160              int flag)
2161 {
2162         int rc = 0;
2163         struct metapage *mp;    /* meta-page buffer */
2164         xtpage_t *p;            /* base B+-tree index page */
2165         s64 bn, xaddr;
2166         int index, nextindex;
2167         struct btstack btstack; /* traverse stack */
2168         struct xtsplit split;   /* split information */
2169         xad_t *xad;
2170         int cmp;
2171         struct tlock *tlck;
2172         struct xtlock *xtlck;
2173         int nsplit, nblocks, xlen;
2174         struct pxdlist pxdlist;
2175         pxd_t *pxd;
2176         s64 next;
2177 
2178         xaddr = *xaddrp;
2179         xlen = *xlenp;
2180         jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2181                  (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2182 
2183         /*
2184          *      search for the entry location at which to insert:
2185          *
2186          * xtFastSearch() and xtSearch() both returns (leaf page
2187          * pinned, index at which to insert).
2188          * n.b. xtSearch() may return index of maxentry of
2189          * the full page.
2190          */
2191         if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
2192                 return rc;
2193 
2194         /* retrieve search result */
2195         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2196 
2197         if (cmp == 0) {
2198                 rc = -EEXIST;
2199                 goto out;
2200         }
2201 
2202         if (next)
2203                 xlen = min(xlen, (int)(next - xoff));
2204 //insert:
2205         /*
2206          *      insert entry for new extent
2207          */
2208         xflag |= XAD_NEW;
2209 
2210         /*
2211          *      if the leaf page is full, split the page and
2212          *      propagate up the router entry for the new page from split
2213          *
2214          * The xtSplitUp() will insert the entry and unpin the leaf page.
2215          */
2216         nextindex = le16_to_cpu(p->header.nextindex);
2217         if (nextindex < le16_to_cpu(p->header.maxentry))
2218                 goto insertLeaf;
2219 
2220         /*
2221          * allocate new index blocks to cover index page split(s)
2222          */
2223         nsplit = btstack.nsplit;
2224         split.pxdlist = &pxdlist;
2225         pxdlist.maxnpxd = pxdlist.npxd = 0;
2226         pxd = &pxdlist.pxd[0];
2227         nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2228         for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2229                 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2230                         PXDaddress(pxd, xaddr);
2231                         PXDlength(pxd, nblocks);
2232 
2233                         pxdlist.maxnpxd++;
2234 
2235                         continue;
2236                 }
2237 
2238                 /* undo allocation */
2239 
2240                 goto out;
2241         }
2242 
2243         xlen = min(xlen, maxblocks);
2244 
2245         /*
2246          * allocate data extent requested
2247          */
2248         if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2249                 goto out;
2250 
2251         split.mp = mp;
2252         split.index = index;
2253         split.flag = xflag;
2254         split.off = xoff;
2255         split.len = xlen;
2256         split.addr = xaddr;
2257         if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2258                 /* undo data extent allocation */
2259                 dbFree(ip, *xaddrp, (s64) * xlenp);
2260 
2261                 return rc;
2262         }
2263 
2264         *xaddrp = xaddr;
2265         *xlenp = xlen;
2266         return 0;
2267 
2268         /*
2269          *      insert the new entry into the leaf page
2270          */
2271       insertLeaf:
2272         /*
2273          * allocate data extent requested
2274          */
2275         if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2276                 goto out;
2277 
2278         BT_MARK_DIRTY(mp, ip);
2279         /*
2280          * acquire a transaction lock on the leaf page;
2281          *
2282          * action: xad insertion/extension;
2283          */
2284         tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2285         xtlck = (struct xtlock *) & tlck->lock;
2286 
2287         /* insert the new entry: mark the entry NEW */
2288         xad = &p->xad[index];
2289         XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2290 
2291         /* advance next available entry index */
2292         le16_add_cpu(&p->header.nextindex, 1);
2293 
2294         xtlck->lwm.offset =
2295             (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2296         xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2297             xtlck->lwm.offset;
2298 
2299         *xaddrp = xaddr;
2300         *xlenp = xlen;
2301 
2302       out:
2303         /* unpin the leaf page */
2304         XT_PUTPAGE(mp);
2305 
2306         return rc;
2307 }
2308 #ifdef _STILL_TO_PORT
2309 
2310 /* - TBD for defragmentaion/reorganization -
2311  *
2312  *      xtDelete()
2313  *
2314  * function:
2315  *      delete the entry with the specified key.
2316  *
2317  *      N.B.: whole extent of the entry is assumed to be deleted.
2318  *
2319  * parameter:
2320  *
2321  * return:
2322  *      ENOENT: if the entry is not found.
2323  *
2324  * exception:
2325  */
2326 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2327 {
2328         int rc = 0;
2329         struct btstack btstack;
2330         int cmp;
2331         s64 bn;
2332         struct metapage *mp;
2333         xtpage_t *p;
2334         int index, nextindex;
2335         struct tlock *tlck;
2336         struct xtlock *xtlck;
2337 
2338         /*
2339          * find the matching entry; xtSearch() pins the page
2340          */
2341         if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2342                 return rc;
2343 
2344         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2345         if (cmp) {
2346                 /* unpin the leaf page */
2347                 XT_PUTPAGE(mp);
2348                 return -ENOENT;
2349         }
2350 
2351         /*
2352          * delete the entry from the leaf page
2353          */
2354         nextindex = le16_to_cpu(p->header.nextindex);
2355         le16_add_cpu(&p->header.nextindex, -1);
2356 
2357         /*
2358          * if the leaf page bocome empty, free the page
2359          */
2360         if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
2361                 return (xtDeleteUp(tid, ip, mp, p, &btstack));
2362 
2363         BT_MARK_DIRTY(mp, ip);
2364         /*
2365          * acquire a transaction lock on the leaf page;
2366          *
2367          * action:xad deletion;
2368          */
2369         tlck = txLock(tid, ip, mp, tlckXTREE);
2370         xtlck = (struct xtlock *) & tlck->lock;
2371         xtlck->lwm.offset =
2372             (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
2373 
2374         /* if delete from middle, shift left/compact the remaining entries */
2375         if (index < nextindex - 1)
2376                 memmove(&p->xad[index], &p->xad[index + 1],
2377                         (nextindex - index - 1) * sizeof(xad_t));
2378 
2379         XT_PUTPAGE(mp);
2380 
2381         return 0;
2382 }
2383 
2384 
2385 /* - TBD for defragmentaion/reorganization -
2386  *
2387  *      xtDeleteUp()
2388  *
2389  * function:
2390  *      free empty pages as propagating deletion up the tree
2391  *
2392  * parameter:
2393  *
2394  * return:
2395  */
2396 static int
2397 xtDeleteUp(tid_t tid, struct inode *ip,
2398            struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
2399 {
2400         int rc = 0;
2401         struct metapage *mp;
2402         xtpage_t *p;
2403         int index, nextindex;
2404         s64 xaddr;
2405         int xlen;
2406         struct btframe *parent;
2407         struct tlock *tlck;
2408         struct xtlock *xtlck;
2409 
2410         /*
2411          * keep root leaf page which has become empty
2412          */
2413         if (fp->header.flag & BT_ROOT) {
2414                 /* keep the root page */
2415                 fp->header.flag &= ~BT_INTERNAL;
2416                 fp->header.flag |= BT_LEAF;
2417                 fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
2418 
2419                 /* XT_PUTPAGE(fmp); */
2420 
2421                 return 0;
2422         }
2423 
2424         /*
2425          * free non-root leaf page
2426          */
2427         if ((rc = xtRelink(tid, ip, fp))) {
2428                 XT_PUTPAGE(fmp);
2429                 return rc;
2430         }
2431 
2432         xaddr = addressPXD(&fp->header.self);
2433         xlen = lengthPXD(&fp->header.self);
2434         /* free the page extent */
2435         dbFree(ip, xaddr, (s64) xlen);
2436 
2437         /* free the buffer page */
2438         discard_metapage(fmp);
2439 
2440         /*
2441          * propagate page deletion up the index tree
2442          *
2443          * If the delete from the parent page makes it empty,
2444          * continue all the way up the tree.
2445          * stop if the root page is reached (which is never deleted) or
2446          * if the entry deletion does not empty the page.
2447          */
2448         while ((parent = BT_POP(btstack)) != NULL) {
2449                 /* get/pin the parent page <sp> */
2450                 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2451                 if (rc)
2452                         return rc;
2453 
2454                 index = parent->index;
2455 
2456                 /* delete the entry for the freed child page from parent.
2457                  */
2458                 nextindex = le16_to_cpu(p->header.nextindex);
2459 
2460                 /*
2461                  * the parent has the single entry being deleted:
2462                  * free the parent page which has become empty.
2463                  */
2464                 if (nextindex == 1) {
2465                         if (p->header.flag & BT_ROOT) {
2466                                 /* keep the root page */
2467                                 p->header.flag &= ~BT_INTERNAL;
2468                                 p->header.flag |= BT_LEAF;
2469                                 p->header.nextindex =
2470                                     cpu_to_le16(XTENTRYSTART);
2471 
2472                                 /* XT_PUTPAGE(mp); */
2473 
2474                                 break;
2475                         } else {
2476                                 /* free the parent page */
2477                                 if ((rc = xtRelink(tid, ip, p)))
2478                                         return rc;
2479 
2480                                 xaddr = addressPXD(&p->header.self);
2481                                 /* free the page extent */
2482                                 dbFree(ip, xaddr,
2483                                        (s64) JFS_SBI(ip->i_sb)->nbperpage);
2484 
2485                                 /* unpin/free the buffer page */
2486                                 discard_metapage(mp);
2487 
2488                                 /* propagate up */
2489                                 continue;
2490                         }
2491                 }
2492                 /*
2493                  * the parent has other entries remaining:
2494                  * delete the router entry from the parent page.
2495                  */
2496                 else {
2497                         BT_MARK_DIRTY(mp, ip);
2498                         /*
2499                          * acquire a transaction lock on the leaf page;
2500                          *
2501                          * action:xad deletion;
2502                          */
2503                         tlck = txLock(tid, ip, mp, tlckXTREE);
2504                         xtlck = (struct xtlock *) & tlck->lock;
2505                         xtlck->lwm.offset =
2506                             (xtlck->lwm.offset) ? min(index,
2507                                                       xtlck->lwm.
2508                                                       offset) : index;
2509 
2510                         /* if delete from middle,
2511                          * shift left/compact the remaining entries in the page
2512                          */
2513                         if (index < nextindex - 1)
2514                                 memmove(&p->xad[index], &p->xad[index + 1],
2515                                         (nextindex - index -
2516                                          1) << L2XTSLOTSIZE);
2517 
2518                         le16_add_cpu(&p->header.nextindex, -1);
2519                         jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2520                                  (ulong) parent->bn, index);
2521                 }
2522 
2523                 /* unpin the parent page */
2524                 XT_PUTPAGE(mp);
2525 
2526                 /* exit propagation up */
2527                 break;
2528         }
2529 
2530         return 0;
2531 }
2532 
2533 
2534 /*
2535  * NAME:        xtRelocate()
2536  *
2537  * FUNCTION:    relocate xtpage or data extent of regular file;
2538  *              This function is mainly used by defragfs utility.
2539  *
2540  * NOTE:        This routine does not have the logic to handle
2541  *              uncommitted allocated extent. The caller should call
2542  *              txCommit() to commit all the allocation before call
2543  *              this routine.
2544  */
2545 int
2546 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
2547            s64 nxaddr,          /* new xaddr */
2548            int xtype)
2549 {                               /* extent type: XTPAGE or DATAEXT */
2550         int rc = 0;
2551         struct tblock *tblk;
2552         struct tlock *tlck;
2553         struct xtlock *xtlck;
2554         struct metapage *mp, *pmp, *lmp, *rmp;  /* meta-page buffer */
2555         xtpage_t *p, *pp, *rp, *lp;     /* base B+-tree index page */
2556         xad_t *xad;
2557         pxd_t *pxd;
2558         s64 xoff, xsize;
2559         int xlen;
2560         s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
2561         cbuf_t *cp;
2562         s64 offset, nbytes, nbrd, pno;
2563         int nb, npages, nblks;
2564         s64 bn;
2565         int cmp;
2566         int index;
2567         struct pxd_lock *pxdlock;
2568         struct btstack btstack; /* traverse stack */
2569 
2570         xtype = xtype & EXTENT_TYPE;
2571 
2572         xoff = offsetXAD(oxad);
2573         oxaddr = addressXAD(oxad);
2574         xlen = lengthXAD(oxad);
2575 
2576         /* validate extent offset */
2577         offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2578         if (offset >= ip->i_size)
2579                 return -ESTALE; /* stale extent */
2580 
2581         jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2582                  xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2583 
2584         /*
2585          *      1. get and validate the parent xtpage/xad entry
2586          *      covering the source extent to be relocated;
2587          */
2588         if (xtype == DATAEXT) {
2589                 /* search in leaf entry */
2590                 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2591                 if (rc)
2592                         return rc;
2593 
2594                 /* retrieve search result */
2595                 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2596 
2597                 if (cmp) {
2598                         XT_PUTPAGE(pmp);
2599                         return -ESTALE;
2600                 }
2601 
2602                 /* validate for exact match with a single entry */
2603                 xad = &pp->xad[index];
2604                 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
2605                         XT_PUTPAGE(pmp);
2606                         return -ESTALE;
2607                 }
2608         } else {                /* (xtype == XTPAGE) */
2609 
2610                 /* search in internal entry */
2611                 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
2612                 if (rc)
2613                         return rc;
2614 
2615                 /* retrieve search result */
2616                 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2617 
2618                 if (cmp) {
2619                         XT_PUTPAGE(pmp);
2620                         return -ESTALE;
2621                 }
2622 
2623                 /* xtSearchNode() validated for exact match with a single entry
2624                  */
2625                 xad = &pp->xad[index];
2626         }
2627         jfs_info("xtRelocate: parent xad entry validated.");
2628 
2629         /*
2630          *      2. relocate the extent
2631          */
2632         if (xtype == DATAEXT) {
2633                 /* if the extent is allocated-but-not-recorded
2634                  * there is no real data to be moved in this extent,
2635                  */
2636                 if (xad->flag & XAD_NOTRECORDED)
2637                         goto out;
2638                 else
2639                         /* release xtpage for cmRead()/xtLookup() */
2640                         XT_PUTPAGE(pmp);
2641 
2642                 /*
2643                  *      cmRelocate()
2644                  *
2645                  * copy target data pages to be relocated;
2646                  *
2647                  * data extent must start at page boundary and
2648                  * multiple of page size (except the last data extent);
2649                  * read in each page of the source data extent into cbuf,
2650                  * update the cbuf extent descriptor of the page to be
2651                  * homeward bound to new dst data extent
2652                  * copy the data from the old extent to new extent.
2653                  * copy is essential for compressed files to avoid problems
2654                  * that can arise if there was a change in compression
2655                  * algorithms.
2656                  * it is a good strategy because it may disrupt cache
2657                  * policy to keep the pages in memory afterwards.
2658                  */
2659                 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
2660                 assert((offset & CM_OFFSET) == 0);
2661                 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2662                 pno = offset >> CM_L2BSIZE;
2663                 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2664 /*
2665                 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2666                           (offset >> CM_L2BSIZE) + 1;
2667 */
2668                 sxaddr = oxaddr;
2669                 dxaddr = nxaddr;
2670 
2671                 /* process the request one cache buffer at a time */
2672                 for (nbrd = 0; nbrd < nbytes; nbrd += nb,
2673                      offset += nb, pno++, npages--) {
2674                         /* compute page size */
2675                         nb = min(nbytes - nbrd, CM_BSIZE);
2676 
2677                         /* get the cache buffer of the page */
2678                         if (rc = cmRead(ip, offset, npages, &cp))
2679                                 break;
2680 
2681                         assert(addressPXD(&cp->cm_pxd) == sxaddr);
2682                         assert(!cp->cm_modified);
2683 
2684                         /* bind buffer with the new extent address */
2685                         nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
2686                         cmSetXD(ip, cp, pno, dxaddr, nblks);
2687 
2688                         /* release the cbuf, mark it as modified */
2689                         cmPut(cp, true);
2690 
2691                         dxaddr += nblks;
2692                         sxaddr += nblks;
2693                 }
2694 
2695                 /* get back parent page */
2696                 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2697                         return rc;
2698 
2699                 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2700                 jfs_info("xtRelocate: target data extent relocated.");
2701         } else {                /* (xtype == XTPAGE) */
2702 
2703                 /*
2704                  * read in the target xtpage from the source extent;
2705                  */
2706                 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2707                 if (rc) {
2708                         XT_PUTPAGE(pmp);
2709                         return rc;
2710                 }
2711 
2712                 /*
2713                  * read in sibling pages if any to update sibling pointers;
2714                  */
2715                 rmp = NULL;
2716                 if (p->header.next) {
2717                         nextbn = le64_to_cpu(p->header.next);
2718                         XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2719                         if (rc) {
2720                                 XT_PUTPAGE(pmp);
2721                                 XT_PUTPAGE(mp);
2722                                 return (rc);
2723                         }
2724                 }
2725 
2726                 lmp = NULL;
2727                 if (p->header.prev) {
2728                         prevbn = le64_to_cpu(p->header.prev);
2729                         XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2730                         if (rc) {
2731                                 XT_PUTPAGE(pmp);
2732                                 XT_PUTPAGE(mp);
2733                                 if (rmp)
2734                                         XT_PUTPAGE(rmp);
2735                                 return (rc);
2736                         }
2737                 }
2738 
2739                 /* at this point, all xtpages to be updated are in memory */
2740 
2741                 /*
2742                  * update sibling pointers of sibling xtpages if any;
2743                  */
2744                 if (lmp) {
2745                         BT_MARK_DIRTY(lmp, ip);
2746                         tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
2747                         lp->header.next = cpu_to_le64(nxaddr);
2748                         XT_PUTPAGE(lmp);
2749                 }
2750 
2751                 if (rmp) {
2752                         BT_MARK_DIRTY(rmp, ip);
2753                         tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
2754                         rp->header.prev = cpu_to_le64(nxaddr);
2755                         XT_PUTPAGE(rmp);
2756                 }
2757 
2758                 /*
2759                  * update the target xtpage to be relocated
2760                  *
2761                  * update the self address of the target page
2762                  * and write to destination extent;
2763                  * redo image covers the whole xtpage since it is new page
2764                  * to the destination extent;
2765                  * update of bmap for the free of source extent
2766                  * of the target xtpage itself:
2767                  * update of bmap for the allocation of destination extent
2768                  * of the target xtpage itself:
2769                  * update of bmap for the extents covered by xad entries in
2770                  * the target xtpage is not necessary since they are not
2771                  * updated;
2772                  * if not committed before this relocation,
2773                  * target page may contain XAD_NEW entries which must
2774                  * be scanned for bmap update (logredo() always
2775                  * scan xtpage REDOPAGE image for bmap update);
2776                  * if committed before this relocation (tlckRELOCATE),
2777                  * scan may be skipped by commit() and logredo();
2778                  */
2779                 BT_MARK_DIRTY(mp, ip);
2780                 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
2781                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
2782                 xtlck = (struct xtlock *) & tlck->lock;
2783 
2784                 /* update the self address in the xtpage header */
2785                 pxd = &p->header.self;
2786                 PXDaddress(pxd, nxaddr);
2787 
2788                 /* linelock for the after image of the whole page */
2789                 xtlck->lwm.length =
2790                     le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
2791 
2792                 /* update the buffer extent descriptor of target xtpage */
2793                 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2794                 bmSetXD(mp, nxaddr, xsize);
2795 
2796                 /* unpin the target page to new homeward bound */
2797                 XT_PUTPAGE(mp);
2798                 jfs_info("xtRelocate: target xtpage relocated.");
2799         }
2800 
2801         /*
2802          *      3. acquire maplock for the source extent to be freed;
2803          *
2804          * acquire a maplock saving the src relocated extent address;
2805          * to free of the extent at commit time;
2806          */
2807       out:
2808         /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
2809          * free PXD of the source data extent (logredo() will update
2810          * bmap for free of source data extent), and update bmap for
2811          * free of the source data extent;
2812          */
2813         if (xtype == DATAEXT)
2814                 tlck = txMaplock(tid, ip, tlckMAP);
2815         /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
2816          * for the source xtpage (logredo() will init NoRedoPage
2817          * filter and will also update bmap for free of the source
2818          * xtpage), and update bmap for free of the source xtpage;
2819          * N.B. We use tlckMAP instead of tlkcXTREE because there
2820          *      is no buffer associated with this lock since the buffer
2821          *      has been redirected to the target location.
2822          */
2823         else                    /* (xtype == XTPAGE) */
2824                 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
2825 
2826         pxdlock = (struct pxd_lock *) & tlck->lock;
2827         pxdlock->flag = mlckFREEPXD;
2828         PXDaddress(&pxdlock->pxd, oxaddr);
2829         PXDlength(&pxdlock->pxd, xlen);
2830         pxdlock->index = 1;
2831 
2832         /*
2833          *      4. update the parent xad entry for relocation;
2834          *
2835          * acquire tlck for the parent entry with XAD_NEW as entry
2836          * update which will write LOG_REDOPAGE and update bmap for
2837          * allocation of XAD_NEW destination extent;
2838          */
2839         jfs_info("xtRelocate: update parent xad entry.");
2840         BT_MARK_DIRTY(pmp, ip);
2841         tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
2842         xtlck = (struct xtlock *) & tlck->lock;
2843 
2844         /* update the XAD with the new destination extent; */
2845         xad = &pp->xad[index];
2846         xad->flag |= XAD_NEW;
2847         XADaddress(xad, nxaddr);
2848 
2849         xtlck->lwm.offset = min(index, xtlck->lwm.offset);
2850         xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
2851             xtlck->lwm.offset;
2852 
2853         /* unpin the parent xtpage */
2854         XT_PUTPAGE(pmp);
2855 
2856         return rc;
2857 }
2858 
2859 
2860 /*
2861  *      xtSearchNode()
2862  *
2863  * function:    search for the internal xad entry covering specified extent.
2864  *              This function is mainly used by defragfs utility.
2865  *
2866  * parameters:
2867  *      ip      - file object;
2868  *      xad     - extent to find;
2869  *      cmpp    - comparison result:
2870  *      btstack - traverse stack;
2871  *      flag    - search process flag;
2872  *
2873  * returns:
2874  *      btstack contains (bn, index) of search path traversed to the entry.
2875  *      *cmpp is set to result of comparison with the entry returned.
2876  *      the page containing the entry is pinned at exit.
2877  */
2878 static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
2879                         int *cmpp, struct btstack * btstack, int flag)
2880 {
2881         int rc = 0;
2882         s64 xoff, xaddr;
2883         int xlen;
2884         int cmp = 1;            /* init for empty page */
2885         s64 bn;                 /* block number */
2886         struct metapage *mp;    /* meta-page buffer */
2887         xtpage_t *p;            /* page */
2888         int base, index, lim;
2889         struct btframe *btsp;
2890         s64 t64;
2891 
2892         BT_CLR(btstack);
2893 
2894         xoff = offsetXAD(xad);
2895         xlen = lengthXAD(xad);
2896         xaddr = addressXAD(xad);
2897 
2898         /*
2899          *      search down tree from root:
2900          *
2901          * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
2902          * internal page, child page Pi contains entry with k, Ki <= K < Kj.
2903          *
2904          * if entry with search key K is not found
2905          * internal page search find the entry with largest key Ki
2906          * less than K which point to the child page to search;
2907          * leaf page search find the entry with smallest key Kj
2908          * greater than K so that the returned index is the position of
2909          * the entry to be shifted right for insertion of new entry.
2910          * for empty tree, search key is greater than any key of the tree.
2911          *
2912          * by convention, root bn = 0.
2913          */
2914         for (bn = 0;;) {
2915                 /* get/pin the page to search */
2916                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2917                 if (rc)
2918                         return rc;
2919                 if (p->header.flag & BT_LEAF) {
2920                         XT_PUTPAGE(mp);
2921                         return -ESTALE;
2922                 }
2923 
2924                 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
2925 
2926                 /*
2927                  * binary search with search key K on the current page
2928                  */
2929                 for (base = XTENTRYSTART; lim; lim >>= 1) {
2930                         index = base + (lim >> 1);
2931 
2932                         XT_CMP(cmp, xoff, &p->xad[index], t64);
2933                         if (cmp == 0) {
2934                                 /*
2935                                  *      search hit
2936                                  *
2937                                  * verify for exact match;
2938                                  */
2939                                 if (xaddr == addressXAD(&p->xad[index]) &&
2940                                     xoff == offsetXAD(&p->xad[index])) {
2941                                         *cmpp = cmp;
2942 
2943                                         /* save search result */
2944                                         btsp = btstack->top;
2945                                         btsp->bn = bn;
2946                                         btsp->index = index;
2947                                         btsp->mp = mp;
2948 
2949                                         return 0;
2950                                 }
2951 
2952                                 /* descend/search its child page */
2953                                 goto next;
2954                         }
2955 
2956                         if (cmp > 0) {
2957                                 base = index + 1;
2958                                 --lim;
2959                         }
2960                 }
2961 
2962                 /*
2963                  *      search miss - non-leaf page:
2964                  *
2965                  * base is the smallest index with key (Kj) greater than
2966                  * search key (K) and may be zero or maxentry index.
2967                  * if base is non-zero, decrement base by one to get the parent
2968                  * entry of the child page to search.
2969                  */
2970                 index = base ? base - 1 : base;
2971 
2972                 /*
2973                  * go down to child page
2974                  */
2975               next:
2976                 /* get the child page block number */
2977                 bn = addressXAD(&p->xad[index]);
2978 
2979                 /* unpin the parent page */
2980                 XT_PUTPAGE(mp);
2981         }
2982 }
2983 
2984 
2985 /*
2986  *      xtRelink()
2987  *
2988  * function:
2989  *      link around a freed page.
2990  *
2991  * Parameter:
2992  *      int             tid,
2993  *      struct inode    *ip,
2994  *      xtpage_t        *p)
2995  *
2996  * returns:
2997  */
2998 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
2999 {
3000         int rc = 0;
3001         struct metapage *mp;
3002         s64 nextbn, prevbn;
3003         struct tlock *tlck;
3004 
3005         nextbn = le64_to_cpu(p->header.next);
3006         prevbn = le64_to_cpu(p->header.prev);
3007 
3008         /* update prev pointer of the next page */
3009         if (nextbn != 0) {
3010                 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
3011                 if (rc)
3012                         return rc;
3013 
3014                 /*
3015                  * acquire a transaction lock on the page;
3016                  *
3017                  * action: update prev pointer;
3018                  */
3019                 BT_MARK_DIRTY(mp, ip);
3020                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3021 
3022                 /* the page may already have been tlock'd */
3023 
3024                 p->header.prev = cpu_to_le64(prevbn);
3025 
3026                 XT_PUTPAGE(mp);
3027         }
3028 
3029         /* update next pointer of the previous page */
3030         if (prevbn != 0) {
3031                 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
3032                 if (rc)
3033                         return rc;
3034 
3035                 /*
3036                  * acquire a transaction lock on the page;
3037                  *
3038                  * action: update next pointer;
3039                  */
3040                 BT_MARK_DIRTY(mp, ip);
3041                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
3042 
3043                 /* the page may already have been tlock'd */
3044 
3045                 p->header.next = le64_to_cpu(nextbn);
3046 
3047                 XT_PUTPAGE(mp);
3048         }
3049 
3050         return 0;
3051 }
3052 #endif                          /*  _STILL_TO_PORT */
3053 
3054 
3055 /*
3056  *      xtInitRoot()
3057  *
3058  * initialize file root (inline in inode)
3059  */
3060 void xtInitRoot(tid_t tid, struct inode *ip)
3061 {
3062         xtpage_t *p;
3063 
3064         /*
3065          * acquire a transaction lock on the root
3066          *
3067          * action:
3068          */
3069         txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
3070                       tlckXTREE | tlckNEW);
3071         p = &JFS_IP(ip)->i_xtroot;
3072 
3073         p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
3074         p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3075 
3076         if (S_ISDIR(ip->i_mode))
3077                 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
3078         else {
3079                 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
3080                 ip->i_size = 0;
3081         }
3082 
3083 
3084         return;
3085 }
3086 
3087 
3088 /*
3089  * We can run into a deadlock truncating a file with a large number of
3090  * xtree pages (large fragmented file).  A robust fix would entail a
3091  * reservation system where we would reserve a number of metadata pages
3092  * and tlocks which we would be guaranteed without a deadlock.  Without
3093  * this, a partial fix is to limit number of metadata pages we will lock
3094  * in a single transaction.  Currently we will truncate the file so that
3095  * no more than 50 leaf pages will be locked.  The caller of xtTruncate
3096  * will be responsible for ensuring that the current transaction gets
3097  * committed, and that subsequent transactions are created to truncate
3098  * the file further if needed.
3099  */
3100 #define MAX_TRUNCATE_LEAVES 50
3101 
3102 /*
3103  *      xtTruncate()
3104  *
3105  * function:
3106  *      traverse for truncation logging backward bottom up;
3107  *      terminate at the last extent entry at the current subtree
3108  *      root page covering new down size.
3109  *      truncation may occur within the last extent entry.
3110  *
3111  * parameter:
3112  *      int             tid,
3113  *      struct inode    *ip,
3114  *      s64             newsize,
3115  *      int             type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3116  *
3117  * return:
3118  *
3119  * note:
3120  *      PWMAP:
3121  *       1. truncate (non-COMMIT_NOLINK file)
3122  *          by jfs_truncate() or jfs_open(O_TRUNC):
3123  *          xtree is updated;
3124  *       2. truncate index table of directory when last entry removed
3125  *      map update via tlock at commit time;
3126  *      PMAP:
3127  *       Call xtTruncate_pmap instead
3128  *      WMAP:
3129  *       1. remove (free zero link count) on last reference release
3130  *          (pmap has been freed at commit zero link count);
3131  *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3132  *          xtree is updated;
3133  *       map update directly at truncation time;
3134  *
3135  *      if (DELETE)
3136  *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3137  *      else if (TRUNCATE)
3138  *              must write LOG_NOREDOPAGE for deleted index page;
3139  *
3140  * pages may already have been tlocked by anonymous transactions
3141  * during file growth (i.e., write) before truncation;
3142  *
3143  * except last truncated entry, deleted entries remains as is
3144  * in the page (nextindex is updated) for other use
3145  * (e.g., log/update allocation map): this avoid copying the page
3146  * info but delay free of pages;
3147  *
3148  */
3149 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3150 {
3151         int rc = 0;
3152         s64 teof;
3153         struct metapage *mp;
3154         xtpage_t *p;
3155         s64 bn;
3156         int index, nextindex;
3157         xad_t *xad;
3158         s64 xoff, xaddr;
3159         int xlen, len, freexlen;
3160         struct btstack btstack;
3161         struct btframe *parent;
3162         struct tblock *tblk = NULL;
3163         struct tlock *tlck = NULL;
3164         struct xtlock *xtlck = NULL;
3165         struct xdlistlock xadlock;      /* maplock for COMMIT_WMAP */
3166         struct pxd_lock *pxdlock;               /* maplock for COMMIT_WMAP */
3167         s64 nfreed;
3168         int freed, log;
3169         int locked_leaves = 0;
3170 
3171         /* save object truncation type */
3172         if (tid) {
3173                 tblk = tid_to_tblock(tid);
3174                 tblk->xflag |= flag;
3175         }
3176 
3177         nfreed = 0;
3178 
3179         flag &= COMMIT_MAP;
3180         assert(flag != COMMIT_PMAP);
3181 
3182         if (flag == COMMIT_PWMAP)
3183                 log = 1;
3184         else {
3185                 log = 0;
3186                 xadlock.flag = mlckFREEXADLIST;
3187                 xadlock.index = 1;
3188         }
3189 
3190         /*
3191          * if the newsize is not an integral number of pages,
3192          * the file between newsize and next page boundary will
3193          * be cleared.
3194          * if truncating into a file hole, it will cause
3195          * a full block to be allocated for the logical block.
3196          */
3197 
3198         /*
3199          * release page blocks of truncated region <teof, eof>
3200          *
3201          * free the data blocks from the leaf index blocks.
3202          * delete the parent index entries corresponding to
3203          * the freed child data/index blocks.
3204          * free the index blocks themselves which aren't needed
3205          * in new sized file.
3206          *
3207          * index blocks are updated only if the blocks are to be
3208          * retained in the new sized file.
3209          * if type is PMAP, the data and index pages are NOT
3210          * freed, and the data and index blocks are NOT freed
3211          * from working map.
3212          * (this will allow continued access of data/index of
3213          * temporary file (zerolink count file truncated to zero-length)).
3214          */
3215         teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
3216             JFS_SBI(ip->i_sb)->l2bsize;
3217 
3218         /* clear stack */
3219         BT_CLR(&btstack);
3220 
3221         /*
3222          * start with root
3223          *
3224          * root resides in the inode
3225          */
3226         bn = 0;
3227 
3228         /*
3229          * first access of each page:
3230          */
3231       getPage:
3232         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3233         if (rc)
3234                 return rc;
3235 
3236         /* process entries backward from last index */
3237         index = le16_to_cpu(p->header.nextindex) - 1;
3238 
3239 
3240         /* Since this is the rightmost page at this level, and we may have
3241          * already freed a page that was formerly to the right, let's make
3242          * sure that the next pointer is zero.
3243          */
3244         if (p->header.next) {
3245                 if (log)
3246                         /*
3247                          * Make sure this change to the header is logged.
3248                          * If we really truncate this leaf, the flag
3249                          * will be changed to tlckTRUNCATE
3250                          */
3251                         tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
3252                 BT_MARK_DIRTY(mp, ip);
3253                 p->header.next = 0;
3254         }
3255 
3256         if (p->header.flag & BT_INTERNAL)
3257                 goto getChild;
3258 
3259         /*
3260          *      leaf page
3261          */
3262         freed = 0;
3263 
3264         /* does region covered by leaf page precede Teof ? */
3265         xad = &p->xad[index];
3266         xoff = offsetXAD(xad);
3267         xlen = lengthXAD(xad);
3268         if (teof >= xoff + xlen) {
3269                 XT_PUTPAGE(mp);
3270                 goto getParent;
3271         }
3272 
3273         /* (re)acquire tlock of the leaf page */
3274         if (log) {
3275                 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3276                         /*
3277                          * We need to limit the size of the transaction
3278                          * to avoid exhausting pagecache & tlocks
3279                          */
3280                         XT_PUTPAGE(mp);
3281                         newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3282                         goto getParent;
3283                 }
3284                 tlck = txLock(tid, ip, mp, tlckXTREE);
3285                 tlck->type = tlckXTREE | tlckTRUNCATE;
3286                 xtlck = (struct xtlock *) & tlck->lock;
3287                 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3288         }
3289         BT_MARK_DIRTY(mp, ip);
3290 
3291         /*
3292          * scan backward leaf page entries
3293          */
3294         for (; index >= XTENTRYSTART; index--) {
3295                 xad = &p->xad[index];
3296                 xoff = offsetXAD(xad);
3297                 xlen = lengthXAD(xad);
3298                 xaddr = addressXAD(xad);
3299 
3300                 /*
3301                  * The "data" for a directory is indexed by the block
3302                  * device's address space.  This metadata must be invalidated
3303                  * here
3304                  */
3305                 if (S_ISDIR(ip->i_mode) && (teof == 0))
3306                         invalidate_xad_metapages(ip, *xad);
3307                 /*
3308                  * entry beyond eof: continue scan of current page
3309                  *          xad
3310                  * ---|---=======------->
3311                  *   eof
3312                  */
3313                 if (teof < xoff) {
3314                         nfreed += xlen;
3315                         continue;
3316                 }
3317 
3318                 /*
3319                  * (xoff <= teof): last entry to be deleted from page;
3320                  * If other entries remain in page: keep and update the page.
3321                  */
3322 
3323                 /*
3324                  * eof == entry_start: delete the entry
3325                  *           xad
3326                  * -------|=======------->
3327                  *       eof
3328                  *
3329                  */
3330                 if (teof == xoff) {
3331                         nfreed += xlen;
3332 
3333                         if (index == XTENTRYSTART)
3334                                 break;
3335 
3336                         nextindex = index;
3337                 }
3338                 /*
3339                  * eof within the entry: truncate the entry.
3340                  *          xad
3341                  * -------===|===------->
3342                  *          eof
3343                  */
3344                 else if (teof < xoff + xlen) {
3345                         /* update truncated entry */
3346                         len = teof - xoff;
3347                         freexlen = xlen - len;
3348                         XADlength(xad, len);
3349 
3350                         /* save pxd of truncated extent in tlck */
3351                         xaddr += len;
3352                         if (log) {      /* COMMIT_PWMAP */
3353                                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
3354                                     min(index, (int)xtlck->lwm.offset) : index;
3355                                 xtlck->lwm.length = index + 1 -
3356                                     xtlck->lwm.offset;
3357                                 xtlck->twm.offset = index;
3358                                 pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
3359                                 pxdlock->flag = mlckFREEPXD;
3360                                 PXDaddress(&pxdlock->pxd, xaddr);
3361                                 PXDlength(&pxdlock->pxd, freexlen);
3362                         }
3363                         /* free truncated extent */
3364                         else {  /* COMMIT_WMAP */
3365 
3366                                 pxdlock = (struct pxd_lock *) & xadlock;
3367                                 pxdlock->flag = mlckFREEPXD;
3368                                 PXDaddress(&pxdlock->pxd, xaddr);
3369                                 PXDlength(&pxdlock->pxd, freexlen);
3370                                 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
3371 
3372                                 /* reset map lock */
3373                                 xadlock.flag = mlckFREEXADLIST;
3374                         }
3375 
3376                         /* current entry is new last entry; */
3377                         nextindex = index + 1;
3378 
3379                         nfreed += freexlen;
3380                 }
3381                 /*
3382                  * eof beyond the entry:
3383                  *          xad
3384                  * -------=======---|--->
3385                  *                 eof
3386                  */
3387                 else {          /* (xoff + xlen < teof) */
3388 
3389                         nextindex = index + 1;
3390                 }
3391 
3392                 if (nextindex < le16_to_cpu(p->header.nextindex)) {
3393                         if (!log) {     /* COMMIT_WAMP */
3394                                 xadlock.xdlist = &p->xad[nextindex];
3395                                 xadlock.count =
3396                                     le16_to_cpu(p->header.nextindex) -
3397                                     nextindex;
3398                                 txFreeMap(ip, (struct maplock *) & xadlock,
3399                                           NULL, COMMIT_WMAP);
3400                         }
3401                         p->header.nextindex = cpu_to_le16(nextindex);
3402                 }
3403 
3404                 XT_PUTPAGE(mp);
3405 
3406                 /* assert(freed == 0); */
3407                 goto getParent;
3408         }                       /* end scan of leaf page entries */
3409 
3410         freed = 1;
3411 
3412         /*
3413          * leaf page become empty: free the page if type != PMAP
3414          */
3415         if (log) {              /* COMMIT_PWMAP */
3416                 /* txCommit() with tlckFREE:
3417                  * free data extents covered by leaf [XTENTRYSTART:hwm);
3418                  * invalidate leaf if COMMIT_PWMAP;
3419                  * if (TRUNCATE), will write LOG_NOREDOPAGE;
3420                  */
3421                 tlck->type = tlckXTREE | tlckFREE;
3422         } else {                /* COMMIT_WAMP */
3423 
3424                 /* free data extents covered by leaf */
3425                 xadlock.xdlist = &p->xad[XTENTRYSTART];
3426                 xadlock.count =
3427                     le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
3428                 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
3429         }
3430 
3431         if (p->header.flag & BT_ROOT) {
3432                 p->header.flag &= ~BT_INTERNAL;
3433                 p->header.flag |= BT_LEAF;
3434                 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3435 
3436                 XT_PUTPAGE(mp); /* debug */
3437                 goto out;
3438         } else {
3439                 if (log) {      /* COMMIT_PWMAP */
3440                         /* page will be invalidated at tx completion
3441                          */
3442                         XT_PUTPAGE(mp);
3443                 } else {        /* COMMIT_WMAP */
3444 
3445                         if (mp->lid)
3446                                 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
3447 
3448                         /* invalidate empty leaf page */
3449                         discard_metapage(mp);
3450                 }
3451         }
3452 
3453         /*
3454          * the leaf page become empty: delete the parent entry
3455          * for the leaf page if the parent page is to be kept
3456          * in the new sized file.
3457          */
3458 
3459         /*
3460          * go back up to the parent page
3461          */
3462       getParent:
3463         /* pop/restore parent entry for the current child page */
3464         if ((parent = BT_POP(&btstack)) == NULL)
3465                 /* current page must have been root */
3466                 goto out;
3467 
3468         /* get back the parent page */
3469         bn = parent->bn;
3470         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3471         if (rc)
3472                 return rc;
3473 
3474         index = parent->index;
3475 
3476         /*
3477          * child page was not empty:
3478          */
3479         if (freed == 0) {
3480                 /* has any entry deleted from parent ? */
3481                 if (index < le16_to_cpu(p->header.nextindex) - 1) {
3482                         /* (re)acquire tlock on the parent page */
3483                         if (log) {      /* COMMIT_PWMAP */
3484                                 /* txCommit() with tlckTRUNCATE:
3485                                  * free child extents covered by parent [);
3486                                  */
3487                                 tlck = txLock(tid, ip, mp, tlckXTREE);
3488                                 xtlck = (struct xtlock *) & tlck->lock;
3489                                 if (!(tlck->type & tlckTRUNCATE)) {
3490                                         xtlck->hwm.offset =
3491                                             le16_to_cpu(p->header.
3492                                                         nextindex) - 1;
3493                                         tlck->type =
3494                                             tlckXTREE | tlckTRUNCATE;
3495                                 }
3496                         } else {        /* COMMIT_WMAP */
3497 
3498                                 /* free child extents covered by parent */
3499                                 xadlock.xdlist = &p->xad[index + 1];
3500                                 xadlock.count =
3501                                     le16_to_cpu(p->header.nextindex) -
3502                                     index - 1;
3503                                 txFreeMap(ip, (struct maplock *) & xadlock,
3504                                           NULL, COMMIT_WMAP);
3505                         }
3506                         BT_MARK_DIRTY(mp, ip);
3507 
3508                         p->header.nextindex = cpu_to_le16(index + 1);
3509                 }
3510                 XT_PUTPAGE(mp);
3511                 goto getParent;
3512         }
3513 
3514         /*
3515          * child page was empty:
3516          */
3517         nfreed += lengthXAD(&p->xad[index]);
3518 
3519         /*
3520          * During working map update, child page's tlock must be handled
3521          * before parent's.  This is because the parent's tlock will cause
3522          * the child's disk space to be marked available in the wmap, so
3523          * it's important that the child page be released by that time.
3524          *
3525          * ToDo:  tlocks should be on doubly-linked list, so we can
3526          * quickly remove it and add it to the end.
3527          */
3528 
3529         /*
3530          * Move parent page's tlock to the end of the tid's tlock list
3531          */
3532         if (log && mp->lid && (tblk->last != mp->lid) &&
3533             lid_to_tlock(mp->lid)->tid) {
3534                 lid_t lid = mp->lid;
3535                 struct tlock *prev;
3536 
3537                 tlck = lid_to_tlock(lid);
3538 
3539                 if (tblk->next == lid)
3540                         tblk->next = tlck->next;
3541                 else {
3542                         for (prev = lid_to_tlock(tblk->next);
3543                              prev->next != lid;
3544                              prev = lid_to_tlock(prev->next)) {
3545                                 assert(prev->next);
3546                         }
3547                         prev->next = tlck->next;
3548                 }
3549                 lid_to_tlock(tblk->last)->next = lid;
3550                 tlck->next = 0;
3551                 tblk->last = lid;
3552         }
3553 
3554         /*
3555          * parent page become empty: free the page
3556          */
3557         if (index == XTENTRYSTART) {
3558                 if (log) {      /* COMMIT_PWMAP */
3559                         /* txCommit() with tlckFREE:
3560                          * free child extents covered by parent;
3561                          * invalidate parent if COMMIT_PWMAP;
3562                          */
3563                         tlck = txLock(tid, ip, mp, tlckXTREE);
3564                         xtlck = (struct xtlock *) & tlck->lock;
3565                         xtlck->hwm.offset =
3566                             le16_to_cpu(p->header.nextindex) - 1;
3567                         tlck->type = tlckXTREE | tlckFREE;
3568                 } else {        /* COMMIT_WMAP */
3569 
3570                         /* free child extents covered by parent */
3571                         xadlock.xdlist = &p->xad[XTENTRYSTART];
3572                         xadlock.count =
3573                             le16_to_cpu(p->header.nextindex) -
3574                             XTENTRYSTART;
3575                         txFreeMap(ip, (struct maplock *) & xadlock, NULL,
3576                                   COMMIT_WMAP);
3577                 }
3578                 BT_MARK_DIRTY(mp, ip);
3579 
3580                 if (p->header.flag & BT_ROOT) {
3581                         p->header.flag &= ~BT_INTERNAL;
3582                         p->header.flag |= BT_LEAF;
3583                         p->header.nextindex = cpu_to_le16(XTENTRYSTART);
3584                         if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
3585                                 /*
3586                                  * Shrink root down to allow inline
3587                                  * EA (otherwise fsck complains)
3588                                  */
3589                                 p->header.maxentry =
3590                                     cpu_to_le16(XTROOTINITSLOT);
3591                                 JFS_IP(ip)->mode2 |= INLINEEA;
3592                         }
3593 
3594                         XT_PUTPAGE(mp); /* debug */
3595                         goto out;
3596                 } else {
3597                         if (log) {      /* COMMIT_PWMAP */
3598                                 /* page will be invalidated at tx completion
3599                                  */
3600                                 XT_PUTPAGE(mp);
3601                         } else {        /* COMMIT_WMAP */
3602 
3603                                 if (mp->lid)
3604                                         lid_to_tlock(mp->lid)->flag |=
3605                                                 tlckFREELOCK;
3606 
3607                                 /* invalidate parent page */
3608                                 discard_metapage(mp);
3609                         }
3610 
3611                         /* parent has become empty and freed:
3612                          * go back up to its parent page
3613                          */
3614                         /* freed = 1; */
3615                         goto getParent;
3616                 }
3617         }
3618         /*
3619          * parent page still has entries for front region;
3620          */
3621         else {
3622                 /* try truncate region covered by preceding entry
3623                  * (process backward)
3624                  */
3625                 index--;
3626 
3627                 /* go back down to the child page corresponding
3628                  * to the entry
3629                  */
3630                 goto getChild;
3631         }
3632 
3633         /*
3634          *      internal page: go down to child page of current entry
3635          */
3636       getChild:
3637         /* save current parent entry for the child page */
3638         if (BT_STACK_FULL(&btstack)) {
3639                 jfs_error(ip->i_sb, "stack overrun!\n");
3640                 XT_PUTPAGE(mp);
3641                 return -EIO;
3642         }
3643         BT_PUSH(&btstack, bn, index);
3644 
3645         /* get child page */
3646         xad = &p->xad[index];
3647         bn = addressXAD(xad);
3648 
3649         /*
3650          * first access of each internal entry:
3651          */
3652         /* release parent page */
3653         XT_PUTPAGE(mp);
3654 
3655         /* process the child page */
3656         goto getPage;
3657 
3658       out:
3659         /*
3660          * update file resource stat
3661          */
3662         /* set size
3663          */
3664         if (S_ISDIR(ip->i_mode) && !newsize)
3665                 ip->i_size = 1; /* fsck hates zero-length directories */
3666         else
3667                 ip->i_size = newsize;
3668 
3669         /* update quota allocation to reflect freed blocks */
3670         dquot_free_block(ip, nfreed);
3671 
3672         /*
3673          * free tlock of invalidated pages
3674          */
3675         if (flag == COMMIT_WMAP)
3676                 txFreelock(ip);
3677 
3678         return newsize;
3679 }
3680 
3681 
3682 /*
3683  *      xtTruncate_pmap()
3684  *
3685  * function:
3686  *      Perform truncate to zero length for deleted file, leaving the
3687  *      the xtree and working map untouched.  This allows the file to
3688  *      be accessed via open file handles, while the delete of the file
3689  *      is committed to disk.
3690  *
3691  * parameter:
3692  *      tid_t           tid,
3693  *      struct inode    *ip,
3694  *      s64             committed_size)
3695  *
3696  * return: new committed size
3697  *
3698  * note:
3699  *
3700  *      To avoid deadlock by holding too many transaction locks, the
3701  *      truncation may be broken up into multiple transactions.
3702  *      The committed_size keeps track of part of the file has been
3703  *      freed from the pmaps.
3704  */
3705 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3706 {
3707         s64 bn;
3708         struct btstack btstack;
3709         int cmp;
3710         int index;
3711         int locked_leaves = 0;
3712         struct metapage *mp;
3713         xtpage_t *p;
3714         struct btframe *parent;
3715         int rc;
3716         struct tblock *tblk;
3717         struct tlock *tlck = NULL;
3718         xad_t *xad;
3719         int xlen;
3720         s64 xoff;
3721         struct xtlock *xtlck = NULL;
3722 
3723         /* save object truncation type */
3724         tblk = tid_to_tblock(tid);
3725         tblk->xflag |= COMMIT_PMAP;
3726 
3727         /* clear stack */
3728         BT_CLR(&btstack);
3729 
3730         if (committed_size) {
3731                 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
3732                 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
3733                 if (rc)
3734                         return rc;
3735 
3736                 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3737 
3738                 if (cmp != 0) {
3739                         XT_PUTPAGE(mp);
3740                         jfs_error(ip->i_sb, "did not find extent\n");
3741                         return -EIO;
3742                 }
3743         } else {
3744                 /*
3745                  * start with root
3746                  *
3747                  * root resides in the inode
3748                  */
3749                 bn = 0;
3750 
3751                 /*
3752                  * first access of each page:
3753                  */
3754       getPage:
3755                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3756                 if (rc)
3757                         return rc;
3758 
3759                 /* process entries backward from last index */
3760                 index = le16_to_cpu(p->header.nextindex) - 1;
3761 
3762                 if (p->header.flag & BT_INTERNAL)
3763                         goto getChild;
3764         }
3765 
3766         /*
3767          *      leaf page
3768          */
3769 
3770         if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
3771                 /*
3772                  * We need to limit the size of the transaction
3773                  * to avoid exhausting pagecache & tlocks
3774                  */
3775                 xad = &p->xad[index];
3776                 xoff = offsetXAD(xad);
3777                 xlen = lengthXAD(xad);
3778                 XT_PUTPAGE(mp);
3779                 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
3780         }
3781         tlck = txLock(tid, ip, mp, tlckXTREE);
3782         tlck->type = tlckXTREE | tlckFREE;
3783         xtlck = (struct xtlock *) & tlck->lock;
3784         xtlck->hwm.offset = index;
3785 
3786 
3787         XT_PUTPAGE(mp);
3788 
3789         /*
3790          * go back up to the parent page
3791          */
3792       getParent:
3793         /* pop/restore parent entry for the current child page */
3794         if ((parent = BT_POP(&btstack)) == NULL)
3795                 /* current page must have been root */
3796                 goto out;
3797 
3798         /* get back the parent page */
3799         bn = parent->bn;
3800         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3801         if (rc)
3802                 return rc;
3803 
3804         index = parent->index;
3805 
3806         /*
3807          * parent page become empty: free the page
3808          */
3809         if (index == XTENTRYSTART) {
3810                 /* txCommit() with tlckFREE:
3811                  * free child extents covered by parent;
3812                  * invalidate parent if COMMIT_PWMAP;
3813                  */
3814                 tlck = txLock(tid, ip, mp, tlckXTREE);
3815                 xtlck = (struct xtlock *) & tlck->lock;
3816                 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
3817                 tlck->type = tlckXTREE | tlckFREE;
3818 
3819                 XT_PUTPAGE(mp);
3820 
3821                 if (p->header.flag & BT_ROOT) {
3822 
3823                         goto out;
3824                 } else {
3825                         goto getParent;
3826                 }
3827         }
3828         /*
3829          * parent page still has entries for front region;
3830          */
3831         else
3832                 index--;
3833         /*
3834          *      internal page: go down to child page of current entry
3835          */
3836       getChild:
3837         /* save current parent entry for the child page */
3838         if (BT_STACK_FULL(&btstack)) {
3839                 jfs_error(ip->i_sb, "stack overrun!\n");
3840                 XT_PUTPAGE(mp);
3841                 return -EIO;
3842         }
3843         BT_PUSH(&btstack, bn, index);
3844 
3845         /* get child page */
3846         xad = &p->xad[index];
3847         bn = addressXAD(xad);
3848 
3849         /*
3850          * first access of each internal entry:
3851          */
3852         /* release parent page */
3853         XT_PUTPAGE(mp);
3854 
3855         /* process the child page */
3856         goto getPage;
3857 
3858       out:
3859 
3860         return 0;
3861 }
3862 
3863 #ifdef CONFIG_JFS_STATISTICS
3864 int jfs_xtstat_proc_show(struct seq_file *m, void *v)
3865 {
3866         seq_printf(m,
3867                        "JFS Xtree statistics\n"
3868                        "====================\n"
3869                        "searches = %d\n"
3870                        "fast searches = %d\n"
3871                        "splits = %d\n",
3872                        xtStat.search,
3873                        xtStat.fastSearch,
3874                        xtStat.split);
3875         return 0;
3876 }
3877 #endif

/* [<][>][^][v][top][bottom][index][help] */