1/*
2 *   Copyright (C) International Business Machines Corp., 2000-2004
3 *
4 *   This program is free software;  you can redistribute it and/or modify
5 *   it under the terms of the GNU General Public License as published by
6 *   the Free Software Foundation; either version 2 of the License, or
7 *   (at your option) any later version.
8 *
9 *   This program is distributed in the hope that it will be useful,
10 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12 *   the GNU General Public License for more details.
13 *
14 *   You should have received a copy of the GNU General Public License
15 *   along with this program;  if not, write to the Free Software
16 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19/*
20 *	jfs_dtree.c: directory B+-tree manager
21 *
22 * B+-tree with variable length key directory:
23 *
24 * each directory page is structured as an array of 32-byte
25 * directory entry slots initialized as a freelist
26 * to avoid search/compaction of free space at insertion.
27 * when an entry is inserted, a number of slots are allocated
28 * from the freelist as required to store variable length data
29 * of the entry; when the entry is deleted, slots of the entry
30 * are returned to freelist.
31 *
32 * leaf entry stores full name as key and file serial number
33 * (aka inode number) as data.
34 * internal/router entry stores sufffix compressed name
35 * as key and simple extent descriptor as data.
36 *
37 * each directory page maintains a sorted entry index table
38 * which stores the start slot index of sorted entries
39 * to allow binary search on the table.
40 *
41 * directory starts as a root/leaf page in on-disk inode
42 * inline data area.
43 * when it becomes full, it starts a leaf of a external extent
44 * of length of 1 block. each time the first leaf becomes full,
45 * it is extended rather than split (its size is doubled),
46 * until its length becoms 4 KBytes, from then the extent is split
47 * with new 4 Kbyte extent when it becomes full
48 * to reduce external fragmentation of small directories.
49 *
50 * blah, blah, blah, for linear scan of directory in pieces by
51 * readdir().
52 *
53 *
54 *	case-insensitive directory file system
55 *
56 * names are stored in case-sensitive way in leaf entry.
57 * but stored, searched and compared in case-insensitive (uppercase) order
58 * (i.e., both search key and entry key are folded for search/compare):
59 * (note that case-sensitive order is BROKEN in storage, e.g.,
60 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
61 *
62 *  entries which folds to the same key makes up a equivalent class
63 *  whose members are stored as contiguous cluster (may cross page boundary)
64 *  but whose order is arbitrary and acts as duplicate, e.g.,
65 *  abc, Abc, aBc, abC)
66 *
67 * once match is found at leaf, requires scan forward/backward
68 * either for, in case-insensitive search, duplicate
69 * or for, in case-sensitive search, for exact match
70 *
71 * router entry must be created/stored in case-insensitive way
72 * in internal entry:
73 * (right most key of left page and left most key of right page
74 * are folded, and its suffix compression is propagated as router
75 * key in parent)
76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
77 * should be made the router key for the split)
78 *
79 * case-insensitive search:
80 *
81 *	fold search key;
82 *
83 *	case-insensitive search of B-tree:
84 *	for internal entry, router key is already folded;
85 *	for leaf entry, fold the entry key before comparison.
86 *
87 *	if (leaf entry case-insensitive match found)
88 *		if (next entry satisfies case-insensitive match)
89 *			return EDUPLICATE;
90 *		if (prev entry satisfies case-insensitive match)
91 *			return EDUPLICATE;
92 *		return match;
93 *	else
94 *		return no match;
95 *
96 *	serialization:
97 * target directory inode lock is being held on entry/exit
98 * of all main directory service routines.
99 *
100 *	log based recovery:
101 */
102
103#include <linux/fs.h>
104#include <linux/quotaops.h>
105#include <linux/slab.h>
106#include "jfs_incore.h"
107#include "jfs_superblock.h"
108#include "jfs_filsys.h"
109#include "jfs_metapage.h"
110#include "jfs_dmap.h"
111#include "jfs_unicode.h"
112#include "jfs_debug.h"
113
114/* dtree split parameter */
115struct dtsplit {
116	struct metapage *mp;
117	s16 index;
118	s16 nslot;
119	struct component_name *key;
120	ddata_t *data;
121	struct pxdlist *pxdlist;
122};
123
124#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
125
126/* get page buffer for specified block address */
127#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)				\
128do {									\
129	BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot);	\
130	if (!(RC)) {							\
131		if (((P)->header.nextindex >				\
132		     (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \
133		    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) {	\
134			BT_PUTPAGE(MP);					\
135			jfs_error((IP)->i_sb,				\
136				  "DT_GETPAGE: dtree page corrupt\n");	\
137			MP = NULL;					\
138			RC = -EIO;					\
139		}							\
140	}								\
141} while (0)
142
143/* for consistency */
144#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
145
146#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
147	BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
148
149/*
150 * forward references
151 */
152static int dtSplitUp(tid_t tid, struct inode *ip,
153		     struct dtsplit * split, struct btstack * btstack);
154
155static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
156		       struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
157
158static int dtExtendPage(tid_t tid, struct inode *ip,
159			struct dtsplit * split, struct btstack * btstack);
160
161static int dtSplitRoot(tid_t tid, struct inode *ip,
162		       struct dtsplit * split, struct metapage ** rmpp);
163
164static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
165		      dtpage_t * fp, struct btstack * btstack);
166
167static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
168
169static int dtReadFirst(struct inode *ip, struct btstack * btstack);
170
171static int dtReadNext(struct inode *ip,
172		      loff_t * offset, struct btstack * btstack);
173
174static int dtCompare(struct component_name * key, dtpage_t * p, int si);
175
176static int ciCompare(struct component_name * key, dtpage_t * p, int si,
177		     int flag);
178
179static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
180		     int flag);
181
182static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
183			      int ri, struct component_name * key, int flag);
184
185static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
186			  ddata_t * data, struct dt_lock **);
187
188static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
189			struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
190			int do_index);
191
192static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
193
194static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
195
196static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
197
198#define ciToUpper(c)	UniStrupr((c)->name)
199
200/*
201 *	read_index_page()
202 *
203 *	Reads a page of a directory's index table.
204 *	Having metadata mapped into the directory inode's address space
205 *	presents a multitude of problems.  We avoid this by mapping to
206 *	the absolute address space outside of the *_metapage routines
207 */
208static struct metapage *read_index_page(struct inode *inode, s64 blkno)
209{
210	int rc;
211	s64 xaddr;
212	int xflag;
213	s32 xlen;
214
215	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
216	if (rc || (xaddr == 0))
217		return NULL;
218
219	return read_metapage(inode, xaddr, PSIZE, 1);
220}
221
222/*
223 *	get_index_page()
224 *
225 *	Same as get_index_page(), but get's a new page without reading
226 */
227static struct metapage *get_index_page(struct inode *inode, s64 blkno)
228{
229	int rc;
230	s64 xaddr;
231	int xflag;
232	s32 xlen;
233
234	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
235	if (rc || (xaddr == 0))
236		return NULL;
237
238	return get_metapage(inode, xaddr, PSIZE, 1);
239}
240
241/*
242 *	find_index()
243 *
244 *	Returns dtree page containing directory table entry for specified
245 *	index and pointer to its entry.
246 *
247 *	mp must be released by caller.
248 */
249static struct dir_table_slot *find_index(struct inode *ip, u32 index,
250					 struct metapage ** mp, s64 *lblock)
251{
252	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
253	s64 blkno;
254	s64 offset;
255	int page_offset;
256	struct dir_table_slot *slot;
257	static int maxWarnings = 10;
258
259	if (index < 2) {
260		if (maxWarnings) {
261			jfs_warn("find_entry called with index = %d", index);
262			maxWarnings--;
263		}
264		return NULL;
265	}
266
267	if (index >= jfs_ip->next_index) {
268		jfs_warn("find_entry called with index >= next_index");
269		return NULL;
270	}
271
272	if (jfs_dirtable_inline(ip)) {
273		/*
274		 * Inline directory table
275		 */
276		*mp = NULL;
277		slot = &jfs_ip->i_dirtable[index - 2];
278	} else {
279		offset = (index - 2) * sizeof(struct dir_table_slot);
280		page_offset = offset & (PSIZE - 1);
281		blkno = ((offset + 1) >> L2PSIZE) <<
282		    JFS_SBI(ip->i_sb)->l2nbperpage;
283
284		if (*mp && (*lblock != blkno)) {
285			release_metapage(*mp);
286			*mp = NULL;
287		}
288		if (!(*mp)) {
289			*lblock = blkno;
290			*mp = read_index_page(ip, blkno);
291		}
292		if (!(*mp)) {
293			jfs_err("free_index: error reading directory table");
294			return NULL;
295		}
296
297		slot =
298		    (struct dir_table_slot *) ((char *) (*mp)->data +
299					       page_offset);
300	}
301	return slot;
302}
303
304static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
305			      u32 index)
306{
307	struct tlock *tlck;
308	struct linelock *llck;
309	struct lv *lv;
310
311	tlck = txLock(tid, ip, mp, tlckDATA);
312	llck = (struct linelock *) tlck->lock;
313
314	if (llck->index >= llck->maxcnt)
315		llck = txLinelock(llck);
316	lv = &llck->lv[llck->index];
317
318	/*
319	 *	Linelock slot size is twice the size of directory table
320	 *	slot size.  512 entries per page.
321	 */
322	lv->offset = ((index - 2) & 511) >> 1;
323	lv->length = 1;
324	llck->index++;
325}
326
327/*
328 *	add_index()
329 *
330 *	Adds an entry to the directory index table.  This is used to provide
331 *	each directory entry with a persistent index in which to resume
332 *	directory traversals
333 */
334static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
335{
336	struct super_block *sb = ip->i_sb;
337	struct jfs_sb_info *sbi = JFS_SBI(sb);
338	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
339	u64 blkno;
340	struct dir_table_slot *dirtab_slot;
341	u32 index;
342	struct linelock *llck;
343	struct lv *lv;
344	struct metapage *mp;
345	s64 offset;
346	uint page_offset;
347	struct tlock *tlck;
348	s64 xaddr;
349
350	ASSERT(DO_INDEX(ip));
351
352	if (jfs_ip->next_index < 2) {
353		jfs_warn("add_index: next_index = %d.  Resetting!",
354			   jfs_ip->next_index);
355		jfs_ip->next_index = 2;
356	}
357
358	index = jfs_ip->next_index++;
359
360	if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
361		/*
362		 * i_size reflects size of index table, or 8 bytes per entry.
363		 */
364		ip->i_size = (loff_t) (index - 1) << 3;
365
366		/*
367		 * dir table fits inline within inode
368		 */
369		dirtab_slot = &jfs_ip->i_dirtable[index-2];
370		dirtab_slot->flag = DIR_INDEX_VALID;
371		dirtab_slot->slot = slot;
372		DTSaddress(dirtab_slot, bn);
373
374		set_cflag(COMMIT_Dirtable, ip);
375
376		return index;
377	}
378	if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
379		struct dir_table_slot temp_table[12];
380
381		/*
382		 * It's time to move the inline table to an external
383		 * page and begin to build the xtree
384		 */
385		if (dquot_alloc_block(ip, sbi->nbperpage))
386			goto clean_up;
387		if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
388			dquot_free_block(ip, sbi->nbperpage);
389			goto clean_up;
390		}
391
392		/*
393		 * Save the table, we're going to overwrite it with the
394		 * xtree root
395		 */
396		memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
397
398		/*
399		 * Initialize empty x-tree
400		 */
401		xtInitRoot(tid, ip);
402
403		/*
404		 * Add the first block to the xtree
405		 */
406		if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
407			/* This really shouldn't fail */
408			jfs_warn("add_index: xtInsert failed!");
409			memcpy(&jfs_ip->i_dirtable, temp_table,
410			       sizeof (temp_table));
411			dbFree(ip, xaddr, sbi->nbperpage);
412			dquot_free_block(ip, sbi->nbperpage);
413			goto clean_up;
414		}
415		ip->i_size = PSIZE;
416
417		mp = get_index_page(ip, 0);
418		if (!mp) {
419			jfs_err("add_index: get_metapage failed!");
420			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
421			memcpy(&jfs_ip->i_dirtable, temp_table,
422			       sizeof (temp_table));
423			goto clean_up;
424		}
425		tlck = txLock(tid, ip, mp, tlckDATA);
426		llck = (struct linelock *) & tlck->lock;
427		ASSERT(llck->index == 0);
428		lv = &llck->lv[0];
429
430		lv->offset = 0;
431		lv->length = 6;	/* tlckDATA slot size is 16 bytes */
432		llck->index++;
433
434		memcpy(mp->data, temp_table, sizeof(temp_table));
435
436		mark_metapage_dirty(mp);
437		release_metapage(mp);
438
439		/*
440		 * Logging is now directed by xtree tlocks
441		 */
442		clear_cflag(COMMIT_Dirtable, ip);
443	}
444
445	offset = (index - 2) * sizeof(struct dir_table_slot);
446	page_offset = offset & (PSIZE - 1);
447	blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
448	if (page_offset == 0) {
449		/*
450		 * This will be the beginning of a new page
451		 */
452		xaddr = 0;
453		if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
454			jfs_warn("add_index: xtInsert failed!");
455			goto clean_up;
456		}
457		ip->i_size += PSIZE;
458
459		if ((mp = get_index_page(ip, blkno)))
460			memset(mp->data, 0, PSIZE);	/* Just looks better */
461		else
462			xtTruncate(tid, ip, offset, COMMIT_PWMAP);
463	} else
464		mp = read_index_page(ip, blkno);
465
466	if (!mp) {
467		jfs_err("add_index: get/read_metapage failed!");
468		goto clean_up;
469	}
470
471	lock_index(tid, ip, mp, index);
472
473	dirtab_slot =
474	    (struct dir_table_slot *) ((char *) mp->data + page_offset);
475	dirtab_slot->flag = DIR_INDEX_VALID;
476	dirtab_slot->slot = slot;
477	DTSaddress(dirtab_slot, bn);
478
479	mark_metapage_dirty(mp);
480	release_metapage(mp);
481
482	return index;
483
484      clean_up:
485
486	jfs_ip->next_index--;
487
488	return 0;
489}
490
491/*
492 *	free_index()
493 *
494 *	Marks an entry to the directory index table as free.
495 */
496static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
497{
498	struct dir_table_slot *dirtab_slot;
499	s64 lblock;
500	struct metapage *mp = NULL;
501
502	dirtab_slot = find_index(ip, index, &mp, &lblock);
503
504	if (!dirtab_slot)
505		return;
506
507	dirtab_slot->flag = DIR_INDEX_FREE;
508	dirtab_slot->slot = dirtab_slot->addr1 = 0;
509	dirtab_slot->addr2 = cpu_to_le32(next);
510
511	if (mp) {
512		lock_index(tid, ip, mp, index);
513		mark_metapage_dirty(mp);
514		release_metapage(mp);
515	} else
516		set_cflag(COMMIT_Dirtable, ip);
517}
518
519/*
520 *	modify_index()
521 *
522 *	Changes an entry in the directory index table
523 */
524static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
525			 int slot, struct metapage ** mp, s64 *lblock)
526{
527	struct dir_table_slot *dirtab_slot;
528
529	dirtab_slot = find_index(ip, index, mp, lblock);
530
531	if (!dirtab_slot)
532		return;
533
534	DTSaddress(dirtab_slot, bn);
535	dirtab_slot->slot = slot;
536
537	if (*mp) {
538		lock_index(tid, ip, *mp, index);
539		mark_metapage_dirty(*mp);
540	} else
541		set_cflag(COMMIT_Dirtable, ip);
542}
543
544/*
545 *	read_index()
546 *
547 *	reads a directory table slot
548 */
549static int read_index(struct inode *ip, u32 index,
550		     struct dir_table_slot * dirtab_slot)
551{
552	s64 lblock;
553	struct metapage *mp = NULL;
554	struct dir_table_slot *slot;
555
556	slot = find_index(ip, index, &mp, &lblock);
557	if (!slot) {
558		return -EIO;
559	}
560
561	memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));
562
563	if (mp)
564		release_metapage(mp);
565
566	return 0;
567}
568
569/*
570 *	dtSearch()
571 *
572 * function:
573 *	Search for the entry with specified key
574 *
575 * parameter:
576 *
577 * return: 0 - search result on stack, leaf page pinned;
578 *	   errno - I/O error
579 */
580int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
581	     struct btstack * btstack, int flag)
582{
583	int rc = 0;
584	int cmp = 1;		/* init for empty page */
585	s64 bn;
586	struct metapage *mp;
587	dtpage_t *p;
588	s8 *stbl;
589	int base, index, lim;
590	struct btframe *btsp;
591	pxd_t *pxd;
592	int psize = 288;	/* initial in-line directory */
593	ino_t inumber;
594	struct component_name ciKey;
595	struct super_block *sb = ip->i_sb;
596
597	ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS);
598	if (!ciKey.name) {
599		rc = -ENOMEM;
600		goto dtSearch_Exit2;
601	}
602
603
604	/* uppercase search key for c-i directory */
605	UniStrcpy(ciKey.name, key->name);
606	ciKey.namlen = key->namlen;
607
608	/* only uppercase if case-insensitive support is on */
609	if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
610		ciToUpper(&ciKey);
611	}
612	BT_CLR(btstack);	/* reset stack */
613
614	/* init level count for max pages to split */
615	btstack->nsplit = 1;
616
617	/*
618	 *	search down tree from root:
619	 *
620	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
621	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
622	 *
623	 * if entry with search key K is not found
624	 * internal page search find the entry with largest key Ki
625	 * less than K which point to the child page to search;
626	 * leaf page search find the entry with smallest key Kj
627	 * greater than K so that the returned index is the position of
628	 * the entry to be shifted right for insertion of new entry.
629	 * for empty tree, search key is greater than any key of the tree.
630	 *
631	 * by convention, root bn = 0.
632	 */
633	for (bn = 0;;) {
634		/* get/pin the page to search */
635		DT_GETPAGE(ip, bn, mp, psize, p, rc);
636		if (rc)
637			goto dtSearch_Exit1;
638
639		/* get sorted entry table of the page */
640		stbl = DT_GETSTBL(p);
641
642		/*
643		 * binary search with search key K on the current page.
644		 */
645		for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
646			index = base + (lim >> 1);
647
648			if (p->header.flag & BT_LEAF) {
649				/* uppercase leaf name to compare */
650				cmp =
651				    ciCompare(&ciKey, p, stbl[index],
652					      JFS_SBI(sb)->mntflag);
653			} else {
654				/* router key is in uppercase */
655
656				cmp = dtCompare(&ciKey, p, stbl[index]);
657
658
659			}
660			if (cmp == 0) {
661				/*
662				 *	search hit
663				 */
664				/* search hit - leaf page:
665				 * return the entry found
666				 */
667				if (p->header.flag & BT_LEAF) {
668					inumber = le32_to_cpu(
669			((struct ldtentry *) & p->slot[stbl[index]])->inumber);
670
671					/*
672					 * search for JFS_LOOKUP
673					 */
674					if (flag == JFS_LOOKUP) {
675						*data = inumber;
676						rc = 0;
677						goto out;
678					}
679
680					/*
681					 * search for JFS_CREATE
682					 */
683					if (flag == JFS_CREATE) {
684						*data = inumber;
685						rc = -EEXIST;
686						goto out;
687					}
688
689					/*
690					 * search for JFS_REMOVE or JFS_RENAME
691					 */
692					if ((flag == JFS_REMOVE ||
693					     flag == JFS_RENAME) &&
694					    *data != inumber) {
695						rc = -ESTALE;
696						goto out;
697					}
698
699					/*
700					 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
701					 */
702					/* save search result */
703					*data = inumber;
704					btsp = btstack->top;
705					btsp->bn = bn;
706					btsp->index = index;
707					btsp->mp = mp;
708
709					rc = 0;
710					goto dtSearch_Exit1;
711				}
712
713				/* search hit - internal page:
714				 * descend/search its child page
715				 */
716				goto getChild;
717			}
718
719			if (cmp > 0) {
720				base = index + 1;
721				--lim;
722			}
723		}
724
725		/*
726		 *	search miss
727		 *
728		 * base is the smallest index with key (Kj) greater than
729		 * search key (K) and may be zero or (maxindex + 1) index.
730		 */
731		/*
732		 * search miss - leaf page
733		 *
734		 * return location of entry (base) where new entry with
735		 * search key K is to be inserted.
736		 */
737		if (p->header.flag & BT_LEAF) {
738			/*
739			 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
740			 */
741			if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
742			    flag == JFS_RENAME) {
743				rc = -ENOENT;
744				goto out;
745			}
746
747			/*
748			 * search for JFS_CREATE|JFS_FINDDIR:
749			 *
750			 * save search result
751			 */
752			*data = 0;
753			btsp = btstack->top;
754			btsp->bn = bn;
755			btsp->index = base;
756			btsp->mp = mp;
757
758			rc = 0;
759			goto dtSearch_Exit1;
760		}
761
762		/*
763		 * search miss - internal page
764		 *
765		 * if base is non-zero, decrement base by one to get the parent
766		 * entry of the child page to search.
767		 */
768		index = base ? base - 1 : base;
769
770		/*
771		 * go down to child page
772		 */
773	      getChild:
774		/* update max. number of pages to split */
775		if (BT_STACK_FULL(btstack)) {
776			/* Something's corrupted, mark filesystem dirty so
777			 * chkdsk will fix it.
778			 */
779			jfs_error(sb, "stack overrun!\n");
780			BT_STACK_DUMP(btstack);
781			rc = -EIO;
782			goto out;
783		}
784		btstack->nsplit++;
785
786		/* push (bn, index) of the parent page/entry */
787		BT_PUSH(btstack, bn, index);
788
789		/* get the child page block number */
790		pxd = (pxd_t *) & p->slot[stbl[index]];
791		bn = addressPXD(pxd);
792		psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
793
794		/* unpin the parent page */
795		DT_PUTPAGE(mp);
796	}
797
798      out:
799	DT_PUTPAGE(mp);
800
801      dtSearch_Exit1:
802
803	kfree(ciKey.name);
804
805      dtSearch_Exit2:
806
807	return rc;
808}
809
810
811/*
812 *	dtInsert()
813 *
814 * function: insert an entry to directory tree
815 *
816 * parameter:
817 *
818 * return: 0 - success;
819 *	   errno - failure;
820 */
821int dtInsert(tid_t tid, struct inode *ip,
822	 struct component_name * name, ino_t * fsn, struct btstack * btstack)
823{
824	int rc = 0;
825	struct metapage *mp;	/* meta-page buffer */
826	dtpage_t *p;		/* base B+-tree index page */
827	s64 bn;
828	int index;
829	struct dtsplit split;	/* split information */
830	ddata_t data;
831	struct dt_lock *dtlck;
832	int n;
833	struct tlock *tlck;
834	struct lv *lv;
835
836	/*
837	 *	retrieve search result
838	 *
839	 * dtSearch() returns (leaf page pinned, index at which to insert).
840	 * n.b. dtSearch() may return index of (maxindex + 1) of
841	 * the full page.
842	 */
843	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
844
845	/*
846	 *	insert entry for new key
847	 */
848	if (DO_INDEX(ip)) {
849		if (JFS_IP(ip)->next_index == DIREND) {
850			DT_PUTPAGE(mp);
851			return -EMLINK;
852		}
853		n = NDTLEAF(name->namlen);
854		data.leaf.tid = tid;
855		data.leaf.ip = ip;
856	} else {
857		n = NDTLEAF_LEGACY(name->namlen);
858		data.leaf.ip = NULL;	/* signifies legacy directory format */
859	}
860	data.leaf.ino = *fsn;
861
862	/*
863	 *	leaf page does not have enough room for new entry:
864	 *
865	 *	extend/split the leaf page;
866	 *
867	 * dtSplitUp() will insert the entry and unpin the leaf page.
868	 */
869	if (n > p->header.freecnt) {
870		split.mp = mp;
871		split.index = index;
872		split.nslot = n;
873		split.key = name;
874		split.data = &data;
875		rc = dtSplitUp(tid, ip, &split, btstack);
876		return rc;
877	}
878
879	/*
880	 *	leaf page does have enough room for new entry:
881	 *
882	 *	insert the new data entry into the leaf page;
883	 */
884	BT_MARK_DIRTY(mp, ip);
885	/*
886	 * acquire a transaction lock on the leaf page
887	 */
888	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
889	dtlck = (struct dt_lock *) & tlck->lock;
890	ASSERT(dtlck->index == 0);
891	lv = & dtlck->lv[0];
892
893	/* linelock header */
894	lv->offset = 0;
895	lv->length = 1;
896	dtlck->index++;
897
898	dtInsertEntry(p, index, name, &data, &dtlck);
899
900	/* linelock stbl of non-root leaf page */
901	if (!(p->header.flag & BT_ROOT)) {
902		if (dtlck->index >= dtlck->maxcnt)
903			dtlck = (struct dt_lock *) txLinelock(dtlck);
904		lv = & dtlck->lv[dtlck->index];
905		n = index >> L2DTSLOTSIZE;
906		lv->offset = p->header.stblindex + n;
907		lv->length =
908		    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
909		dtlck->index++;
910	}
911
912	/* unpin the leaf page */
913	DT_PUTPAGE(mp);
914
915	return 0;
916}
917
918
919/*
920 *	dtSplitUp()
921 *
922 * function: propagate insertion bottom up;
923 *
924 * parameter:
925 *
926 * return: 0 - success;
927 *	   errno - failure;
928 *	leaf page unpinned;
929 */
930static int dtSplitUp(tid_t tid,
931	  struct inode *ip, struct dtsplit * split, struct btstack * btstack)
932{
933	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
934	int rc = 0;
935	struct metapage *smp;
936	dtpage_t *sp;		/* split page */
937	struct metapage *rmp;
938	dtpage_t *rp;		/* new right page split from sp */
939	pxd_t rpxd;		/* new right page extent descriptor */
940	struct metapage *lmp;
941	dtpage_t *lp;		/* left child page */
942	int skip;		/* index of entry of insertion */
943	struct btframe *parent;	/* parent page entry on traverse stack */
944	s64 xaddr, nxaddr;
945	int xlen, xsize;
946	struct pxdlist pxdlist;
947	pxd_t *pxd;
948	struct component_name key = { 0, NULL };
949	ddata_t *data = split->data;
950	int n;
951	struct dt_lock *dtlck;
952	struct tlock *tlck;
953	struct lv *lv;
954	int quota_allocation = 0;
955
956	/* get split page */
957	smp = split->mp;
958	sp = DT_PAGE(ip, smp);
959
960	key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS);
961	if (!key.name) {
962		DT_PUTPAGE(smp);
963		rc = -ENOMEM;
964		goto dtSplitUp_Exit;
965	}
966
967	/*
968	 *	split leaf page
969	 *
970	 * The split routines insert the new entry, and
971	 * acquire txLock as appropriate.
972	 */
973	/*
974	 *	split root leaf page:
975	 */
976	if (sp->header.flag & BT_ROOT) {
977		/*
978		 * allocate a single extent child page
979		 */
980		xlen = 1;
981		n = sbi->bsize >> L2DTSLOTSIZE;
982		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
983		n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
984		if (n <= split->nslot)
985			xlen++;
986		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
987			DT_PUTPAGE(smp);
988			goto freeKeyName;
989		}
990
991		pxdlist.maxnpxd = 1;
992		pxdlist.npxd = 0;
993		pxd = &pxdlist.pxd[0];
994		PXDaddress(pxd, xaddr);
995		PXDlength(pxd, xlen);
996		split->pxdlist = &pxdlist;
997		rc = dtSplitRoot(tid, ip, split, &rmp);
998
999		if (rc)
1000			dbFree(ip, xaddr, xlen);
1001		else
1002			DT_PUTPAGE(rmp);
1003
1004		DT_PUTPAGE(smp);
1005
1006		if (!DO_INDEX(ip))
1007			ip->i_size = xlen << sbi->l2bsize;
1008
1009		goto freeKeyName;
1010	}
1011
1012	/*
1013	 *	extend first leaf page
1014	 *
1015	 * extend the 1st extent if less than buffer page size
1016	 * (dtExtendPage() reurns leaf page unpinned)
1017	 */
1018	pxd = &sp->header.self;
1019	xlen = lengthPXD(pxd);
1020	xsize = xlen << sbi->l2bsize;
1021	if (xsize < PSIZE) {
1022		xaddr = addressPXD(pxd);
1023		n = xsize >> L2DTSLOTSIZE;
1024		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
1025		if ((n + sp->header.freecnt) <= split->nslot)
1026			n = xlen + (xlen << 1);
1027		else
1028			n = xlen;
1029
1030		/* Allocate blocks to quota. */
1031		rc = dquot_alloc_block(ip, n);
1032		if (rc)
1033			goto extendOut;
1034		quota_allocation += n;
1035
1036		if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
1037				    (s64) n, &nxaddr)))
1038			goto extendOut;
1039
1040		pxdlist.maxnpxd = 1;
1041		pxdlist.npxd = 0;
1042		pxd = &pxdlist.pxd[0];
1043		PXDaddress(pxd, nxaddr);
1044		PXDlength(pxd, xlen + n);
1045		split->pxdlist = &pxdlist;
1046		if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1047			nxaddr = addressPXD(pxd);
1048			if (xaddr != nxaddr) {
1049				/* free relocated extent */
1050				xlen = lengthPXD(pxd);
1051				dbFree(ip, nxaddr, (s64) xlen);
1052			} else {
1053				/* free extended delta */
1054				xlen = lengthPXD(pxd) - n;
1055				xaddr = addressPXD(pxd) + xlen;
1056				dbFree(ip, xaddr, (s64) n);
1057			}
1058		} else if (!DO_INDEX(ip))
1059			ip->i_size = lengthPXD(pxd) << sbi->l2bsize;
1060
1061
1062	      extendOut:
1063		DT_PUTPAGE(smp);
1064		goto freeKeyName;
1065	}
1066
1067	/*
1068	 *	split leaf page <sp> into <sp> and a new right page <rp>.
1069	 *
1070	 * return <rp> pinned and its extent descriptor <rpxd>
1071	 */
1072	/*
1073	 * allocate new directory page extent and
1074	 * new index page(s) to cover page split(s)
1075	 *
1076	 * allocation hint: ?
1077	 */
1078	n = btstack->nsplit;
1079	pxdlist.maxnpxd = pxdlist.npxd = 0;
1080	xlen = sbi->nbperpage;
1081	for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
1082		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
1083			PXDaddress(pxd, xaddr);
1084			PXDlength(pxd, xlen);
1085			pxdlist.maxnpxd++;
1086			continue;
1087		}
1088
1089		DT_PUTPAGE(smp);
1090
1091		/* undo allocation */
1092		goto splitOut;
1093	}
1094
1095	split->pxdlist = &pxdlist;
1096	if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
1097		DT_PUTPAGE(smp);
1098
1099		/* undo allocation */
1100		goto splitOut;
1101	}
1102
1103	if (!DO_INDEX(ip))
1104		ip->i_size += PSIZE;
1105
1106	/*
1107	 * propagate up the router entry for the leaf page just split
1108	 *
1109	 * insert a router entry for the new page into the parent page,
1110	 * propagate the insert/split up the tree by walking back the stack
1111	 * of (bn of parent page, index of child page entry in parent page)
1112	 * that were traversed during the search for the page that split.
1113	 *
1114	 * the propagation of insert/split up the tree stops if the root
1115	 * splits or the page inserted into doesn't have to split to hold
1116	 * the new entry.
1117	 *
1118	 * the parent entry for the split page remains the same, and
1119	 * a new entry is inserted at its right with the first key and
1120	 * block number of the new right page.
1121	 *
1122	 * There are a maximum of 4 pages pinned at any time:
1123	 * two children, left parent and right parent (when the parent splits).
1124	 * keep the child pages pinned while working on the parent.
1125	 * make sure that all pins are released at exit.
1126	 */
1127	while ((parent = BT_POP(btstack)) != NULL) {
1128		/* parent page specified by stack frame <parent> */
1129
1130		/* keep current child pages (<lp>, <rp>) pinned */
1131		lmp = smp;
1132		lp = sp;
1133
1134		/*
1135		 * insert router entry in parent for new right child page <rp>
1136		 */
1137		/* get the parent page <sp> */
1138		DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
1139		if (rc) {
1140			DT_PUTPAGE(lmp);
1141			DT_PUTPAGE(rmp);
1142			goto splitOut;
1143		}
1144
1145		/*
1146		 * The new key entry goes ONE AFTER the index of parent entry,
1147		 * because the split was to the right.
1148		 */
1149		skip = parent->index + 1;
1150
1151		/*
1152		 * compute the key for the router entry
1153		 *
1154		 * key suffix compression:
1155		 * for internal pages that have leaf pages as children,
1156		 * retain only what's needed to distinguish between
1157		 * the new entry and the entry on the page to its left.
1158		 * If the keys compare equal, retain the entire key.
1159		 *
1160		 * note that compression is performed only at computing
1161		 * router key at the lowest internal level.
1162		 * further compression of the key between pairs of higher
1163		 * level internal pages loses too much information and
1164		 * the search may fail.
1165		 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
1166		 * results in two adjacent parent entries (a)(xx).
1167		 * if split occurs between these two entries, and
1168		 * if compression is applied, the router key of parent entry
1169		 * of right page (x) will divert search for x into right
1170		 * subtree and miss x in the left subtree.)
1171		 *
1172		 * the entire key must be retained for the next-to-leftmost
1173		 * internal key at any level of the tree, or search may fail
1174		 * (e.g., ?)
1175		 */
1176		switch (rp->header.flag & BT_TYPE) {
1177		case BT_LEAF:
1178			/*
1179			 * compute the length of prefix for suffix compression
1180			 * between last entry of left page and first entry
1181			 * of right page
1182			 */
1183			if ((sp->header.flag & BT_ROOT && skip > 1) ||
1184			    sp->header.prev != 0 || skip > 1) {
1185				/* compute uppercase router prefix key */
1186				rc = ciGetLeafPrefixKey(lp,
1187							lp->header.nextindex-1,
1188							rp, 0, &key,
1189							sbi->mntflag);
1190				if (rc) {
1191					DT_PUTPAGE(lmp);
1192					DT_PUTPAGE(rmp);
1193					DT_PUTPAGE(smp);
1194					goto splitOut;
1195				}
1196			} else {
1197				/* next to leftmost entry of
1198				   lowest internal level */
1199
1200				/* compute uppercase router key */
1201				dtGetKey(rp, 0, &key, sbi->mntflag);
1202				key.name[key.namlen] = 0;
1203
1204				if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
1205					ciToUpper(&key);
1206			}
1207
1208			n = NDTINTERNAL(key.namlen);
1209			break;
1210
1211		case BT_INTERNAL:
1212			dtGetKey(rp, 0, &key, sbi->mntflag);
1213			n = NDTINTERNAL(key.namlen);
1214			break;
1215
1216		default:
1217			jfs_err("dtSplitUp(): UFO!");
1218			break;
1219		}
1220
1221		/* unpin left child page */
1222		DT_PUTPAGE(lmp);
1223
1224		/*
1225		 * compute the data for the router entry
1226		 */
1227		data->xd = rpxd;	/* child page xd */
1228
1229		/*
1230		 * parent page is full - split the parent page
1231		 */
1232		if (n > sp->header.freecnt) {
1233			/* init for parent page split */
1234			split->mp = smp;
1235			split->index = skip;	/* index at insert */
1236			split->nslot = n;
1237			split->key = &key;
1238			/* split->data = data; */
1239
1240			/* unpin right child page */
1241			DT_PUTPAGE(rmp);
1242
1243			/* The split routines insert the new entry,
1244			 * acquire txLock as appropriate.
1245			 * return <rp> pinned and its block number <rbn>.
1246			 */
1247			rc = (sp->header.flag & BT_ROOT) ?
1248			    dtSplitRoot(tid, ip, split, &rmp) :
1249			    dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
1250			if (rc) {
1251				DT_PUTPAGE(smp);
1252				goto splitOut;
1253			}
1254
1255			/* smp and rmp are pinned */
1256		}
1257		/*
1258		 * parent page is not full - insert router entry in parent page
1259		 */
1260		else {
1261			BT_MARK_DIRTY(smp, ip);
1262			/*
1263			 * acquire a transaction lock on the parent page
1264			 */
1265			tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1266			dtlck = (struct dt_lock *) & tlck->lock;
1267			ASSERT(dtlck->index == 0);
1268			lv = & dtlck->lv[0];
1269
1270			/* linelock header */
1271			lv->offset = 0;
1272			lv->length = 1;
1273			dtlck->index++;
1274
1275			/* linelock stbl of non-root parent page */
1276			if (!(sp->header.flag & BT_ROOT)) {
1277				lv++;
1278				n = skip >> L2DTSLOTSIZE;
1279				lv->offset = sp->header.stblindex + n;
1280				lv->length =
1281				    ((sp->header.nextindex -
1282				      1) >> L2DTSLOTSIZE) - n + 1;
1283				dtlck->index++;
1284			}
1285
1286			dtInsertEntry(sp, skip, &key, data, &dtlck);
1287
1288			/* exit propagate up */
1289			break;
1290		}
1291	}
1292
1293	/* unpin current split and its right page */
1294	DT_PUTPAGE(smp);
1295	DT_PUTPAGE(rmp);
1296
1297	/*
1298	 * free remaining extents allocated for split
1299	 */
1300      splitOut:
1301	n = pxdlist.npxd;
1302	pxd = &pxdlist.pxd[n];
1303	for (; n < pxdlist.maxnpxd; n++, pxd++)
1304		dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
1305
1306      freeKeyName:
1307	kfree(key.name);
1308
1309	/* Rollback quota allocation */
1310	if (rc && quota_allocation)
1311		dquot_free_block(ip, quota_allocation);
1312
1313      dtSplitUp_Exit:
1314
1315	return rc;
1316}
1317
1318
1319/*
1320 *	dtSplitPage()
1321 *
1322 * function: Split a non-root page of a btree.
1323 *
1324 * parameter:
1325 *
1326 * return: 0 - success;
1327 *	   errno - failure;
1328 *	return split and new page pinned;
1329 */
1330static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1331	    struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
1332{
1333	int rc = 0;
1334	struct metapage *smp;
1335	dtpage_t *sp;
1336	struct metapage *rmp;
1337	dtpage_t *rp;		/* new right page allocated */
1338	s64 rbn;		/* new right page block number */
1339	struct metapage *mp;
1340	dtpage_t *p;
1341	s64 nextbn;
1342	struct pxdlist *pxdlist;
1343	pxd_t *pxd;
1344	int skip, nextindex, half, left, nxt, off, si;
1345	struct ldtentry *ldtentry;
1346	struct idtentry *idtentry;
1347	u8 *stbl;
1348	struct dtslot *f;
1349	int fsi, stblsize;
1350	int n;
1351	struct dt_lock *sdtlck, *rdtlck;
1352	struct tlock *tlck;
1353	struct dt_lock *dtlck;
1354	struct lv *slv, *rlv, *lv;
1355
1356	/* get split page */
1357	smp = split->mp;
1358	sp = DT_PAGE(ip, smp);
1359
1360	/*
1361	 * allocate the new right page for the split
1362	 */
1363	pxdlist = split->pxdlist;
1364	pxd = &pxdlist->pxd[pxdlist->npxd];
1365	pxdlist->npxd++;
1366	rbn = addressPXD(pxd);
1367	rmp = get_metapage(ip, rbn, PSIZE, 1);
1368	if (rmp == NULL)
1369		return -EIO;
1370
1371	/* Allocate blocks to quota. */
1372	rc = dquot_alloc_block(ip, lengthPXD(pxd));
1373	if (rc) {
1374		release_metapage(rmp);
1375		return rc;
1376	}
1377
1378	jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
1379
1380	BT_MARK_DIRTY(rmp, ip);
1381	/*
1382	 * acquire a transaction lock on the new right page
1383	 */
1384	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1385	rdtlck = (struct dt_lock *) & tlck->lock;
1386
1387	rp = (dtpage_t *) rmp->data;
1388	*rpp = rp;
1389	rp->header.self = *pxd;
1390
1391	BT_MARK_DIRTY(smp, ip);
1392	/*
1393	 * acquire a transaction lock on the split page
1394	 *
1395	 * action:
1396	 */
1397	tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
1398	sdtlck = (struct dt_lock *) & tlck->lock;
1399
1400	/* linelock header of split page */
1401	ASSERT(sdtlck->index == 0);
1402	slv = & sdtlck->lv[0];
1403	slv->offset = 0;
1404	slv->length = 1;
1405	sdtlck->index++;
1406
1407	/*
1408	 * initialize/update sibling pointers between sp and rp
1409	 */
1410	nextbn = le64_to_cpu(sp->header.next);
1411	rp->header.next = cpu_to_le64(nextbn);
1412	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1413	sp->header.next = cpu_to_le64(rbn);
1414
1415	/*
1416	 * initialize new right page
1417	 */
1418	rp->header.flag = sp->header.flag;
1419
1420	/* compute sorted entry table at start of extent data area */
1421	rp->header.nextindex = 0;
1422	rp->header.stblindex = 1;
1423
1424	n = PSIZE >> L2DTSLOTSIZE;
1425	rp->header.maxslot = n;
1426	stblsize = (n + 31) >> L2DTSLOTSIZE;	/* in unit of slot */
1427
1428	/* init freelist */
1429	fsi = rp->header.stblindex + stblsize;
1430	rp->header.freelist = fsi;
1431	rp->header.freecnt = rp->header.maxslot - fsi;
1432
1433	/*
1434	 *	sequential append at tail: append without split
1435	 *
1436	 * If splitting the last page on a level because of appending
1437	 * a entry to it (skip is maxentry), it's likely that the access is
1438	 * sequential. Adding an empty page on the side of the level is less
1439	 * work and can push the fill factor much higher than normal.
1440	 * If we're wrong it's no big deal, we'll just do the split the right
1441	 * way next time.
1442	 * (It may look like it's equally easy to do a similar hack for
1443	 * reverse sorted data, that is, split the tree left,
1444	 * but it's not. Be my guest.)
1445	 */
1446	if (nextbn == 0 && split->index == sp->header.nextindex) {
1447		/* linelock header + stbl (first slot) of new page */
1448		rlv = & rdtlck->lv[rdtlck->index];
1449		rlv->offset = 0;
1450		rlv->length = 2;
1451		rdtlck->index++;
1452
1453		/*
1454		 * initialize freelist of new right page
1455		 */
1456		f = &rp->slot[fsi];
1457		for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1458			f->next = fsi;
1459		f->next = -1;
1460
1461		/* insert entry at the first entry of the new right page */
1462		dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
1463
1464		goto out;
1465	}
1466
1467	/*
1468	 *	non-sequential insert (at possibly middle page)
1469	 */
1470
1471	/*
1472	 * update prev pointer of previous right sibling page;
1473	 */
1474	if (nextbn != 0) {
1475		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
1476		if (rc) {
1477			discard_metapage(rmp);
1478			return rc;
1479		}
1480
1481		BT_MARK_DIRTY(mp, ip);
1482		/*
1483		 * acquire a transaction lock on the next page
1484		 */
1485		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
1486		jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
1487			tlck, ip, mp);
1488		dtlck = (struct dt_lock *) & tlck->lock;
1489
1490		/* linelock header of previous right sibling page */
1491		lv = & dtlck->lv[dtlck->index];
1492		lv->offset = 0;
1493		lv->length = 1;
1494		dtlck->index++;
1495
1496		p->header.prev = cpu_to_le64(rbn);
1497
1498		DT_PUTPAGE(mp);
1499	}
1500
1501	/*
1502	 * split the data between the split and right pages.
1503	 */
1504	skip = split->index;
1505	half = (PSIZE >> L2DTSLOTSIZE) >> 1;	/* swag */
1506	left = 0;
1507
1508	/*
1509	 *	compute fill factor for split pages
1510	 *
1511	 * <nxt> traces the next entry to move to rp
1512	 * <off> traces the next entry to stay in sp
1513	 */
1514	stbl = (u8 *) & sp->slot[sp->header.stblindex];
1515	nextindex = sp->header.nextindex;
1516	for (nxt = off = 0; nxt < nextindex; ++off) {
1517		if (off == skip)
1518			/* check for fill factor with new entry size */
1519			n = split->nslot;
1520		else {
1521			si = stbl[nxt];
1522			switch (sp->header.flag & BT_TYPE) {
1523			case BT_LEAF:
1524				ldtentry = (struct ldtentry *) & sp->slot[si];
1525				if (DO_INDEX(ip))
1526					n = NDTLEAF(ldtentry->namlen);
1527				else
1528					n = NDTLEAF_LEGACY(ldtentry->
1529							   namlen);
1530				break;
1531
1532			case BT_INTERNAL:
1533				idtentry = (struct idtentry *) & sp->slot[si];
1534				n = NDTINTERNAL(idtentry->namlen);
1535				break;
1536
1537			default:
1538				break;
1539			}
1540
1541			++nxt;	/* advance to next entry to move in sp */
1542		}
1543
1544		left += n;
1545		if (left >= half)
1546			break;
1547	}
1548
1549	/* <nxt> poins to the 1st entry to move */
1550
1551	/*
1552	 *	move entries to right page
1553	 *
1554	 * dtMoveEntry() initializes rp and reserves entry for insertion
1555	 *
1556	 * split page moved out entries are linelocked;
1557	 * new/right page moved in entries are linelocked;
1558	 */
1559	/* linelock header + stbl of new right page */
1560	rlv = & rdtlck->lv[rdtlck->index];
1561	rlv->offset = 0;
1562	rlv->length = 5;
1563	rdtlck->index++;
1564
1565	dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
1566
1567	sp->header.nextindex = nxt;
1568
1569	/*
1570	 * finalize freelist of new right page
1571	 */
1572	fsi = rp->header.freelist;
1573	f = &rp->slot[fsi];
1574	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1575		f->next = fsi;
1576	f->next = -1;
1577
1578	/*
1579	 * Update directory index table for entries now in right page
1580	 */
1581	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1582		s64 lblock;
1583
1584		mp = NULL;
1585		stbl = DT_GETSTBL(rp);
1586		for (n = 0; n < rp->header.nextindex; n++) {
1587			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
1588			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
1589				     rbn, n, &mp, &lblock);
1590		}
1591		if (mp)
1592			release_metapage(mp);
1593	}
1594
1595	/*
1596	 * the skipped index was on the left page,
1597	 */
1598	if (skip <= off) {
1599		/* insert the new entry in the split page */
1600		dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
1601
1602		/* linelock stbl of split page */
1603		if (sdtlck->index >= sdtlck->maxcnt)
1604			sdtlck = (struct dt_lock *) txLinelock(sdtlck);
1605		slv = & sdtlck->lv[sdtlck->index];
1606		n = skip >> L2DTSLOTSIZE;
1607		slv->offset = sp->header.stblindex + n;
1608		slv->length =
1609		    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
1610		sdtlck->index++;
1611	}
1612	/*
1613	 * the skipped index was on the right page,
1614	 */
1615	else {
1616		/* adjust the skip index to reflect the new position */
1617		skip -= nxt;
1618
1619		/* insert the new entry in the right page */
1620		dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
1621	}
1622
1623      out:
1624	*rmpp = rmp;
1625	*rpxdp = *pxd;
1626
1627	return rc;
1628}
1629
1630
1631/*
1632 *	dtExtendPage()
1633 *
1634 * function: extend 1st/only directory leaf page
1635 *
1636 * parameter:
1637 *
1638 * return: 0 - success;
1639 *	   errno - failure;
1640 *	return extended page pinned;
1641 */
1642static int dtExtendPage(tid_t tid,
1643	     struct inode *ip, struct dtsplit * split, struct btstack * btstack)
1644{
1645	struct super_block *sb = ip->i_sb;
1646	int rc;
1647	struct metapage *smp, *pmp, *mp;
1648	dtpage_t *sp, *pp;
1649	struct pxdlist *pxdlist;
1650	pxd_t *pxd, *tpxd;
1651	int xlen, xsize;
1652	int newstblindex, newstblsize;
1653	int oldstblindex, oldstblsize;
1654	int fsi, last;
1655	struct dtslot *f;
1656	struct btframe *parent;
1657	int n;
1658	struct dt_lock *dtlck;
1659	s64 xaddr, txaddr;
1660	struct tlock *tlck;
1661	struct pxd_lock *pxdlock;
1662	struct lv *lv;
1663	uint type;
1664	struct ldtentry *ldtentry;
1665	u8 *stbl;
1666
1667	/* get page to extend */
1668	smp = split->mp;
1669	sp = DT_PAGE(ip, smp);
1670
1671	/* get parent/root page */
1672	parent = BT_POP(btstack);
1673	DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
1674	if (rc)
1675		return (rc);
1676
1677	/*
1678	 *	extend the extent
1679	 */
1680	pxdlist = split->pxdlist;
1681	pxd = &pxdlist->pxd[pxdlist->npxd];
1682	pxdlist->npxd++;
1683
1684	xaddr = addressPXD(pxd);
1685	tpxd = &sp->header.self;
1686	txaddr = addressPXD(tpxd);
1687	/* in-place extension */
1688	if (xaddr == txaddr) {
1689		type = tlckEXTEND;
1690	}
1691	/* relocation */
1692	else {
1693		type = tlckNEW;
1694
1695		/* save moved extent descriptor for later free */
1696		tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
1697		pxdlock = (struct pxd_lock *) & tlck->lock;
1698		pxdlock->flag = mlckFREEPXD;
1699		pxdlock->pxd = sp->header.self;
1700		pxdlock->index = 1;
1701
1702		/*
1703		 * Update directory index table to reflect new page address
1704		 */
1705		if (DO_INDEX(ip)) {
1706			s64 lblock;
1707
1708			mp = NULL;
1709			stbl = DT_GETSTBL(sp);
1710			for (n = 0; n < sp->header.nextindex; n++) {
1711				ldtentry =
1712				    (struct ldtentry *) & sp->slot[stbl[n]];
1713				modify_index(tid, ip,
1714					     le32_to_cpu(ldtentry->index),
1715					     xaddr, n, &mp, &lblock);
1716			}
1717			if (mp)
1718				release_metapage(mp);
1719		}
1720	}
1721
1722	/*
1723	 *	extend the page
1724	 */
1725	sp->header.self = *pxd;
1726
1727	jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);
1728
1729	BT_MARK_DIRTY(smp, ip);
1730	/*
1731	 * acquire a transaction lock on the extended/leaf page
1732	 */
1733	tlck = txLock(tid, ip, smp, tlckDTREE | type);
1734	dtlck = (struct dt_lock *) & tlck->lock;
1735	lv = & dtlck->lv[0];
1736
1737	/* update buffer extent descriptor of extended page */
1738	xlen = lengthPXD(pxd);
1739	xsize = xlen << JFS_SBI(sb)->l2bsize;
1740
1741	/*
1742	 * copy old stbl to new stbl at start of extended area
1743	 */
1744	oldstblindex = sp->header.stblindex;
1745	oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
1746	newstblindex = sp->header.maxslot;
1747	n = xsize >> L2DTSLOTSIZE;
1748	newstblsize = (n + 31) >> L2DTSLOTSIZE;
1749	memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
1750	       sp->header.nextindex);
1751
1752	/*
1753	 * in-line extension: linelock old area of extended page
1754	 */
1755	if (type == tlckEXTEND) {
1756		/* linelock header */
1757		lv->offset = 0;
1758		lv->length = 1;
1759		dtlck->index++;
1760		lv++;
1761
1762		/* linelock new stbl of extended page */
1763		lv->offset = newstblindex;
1764		lv->length = newstblsize;
1765	}
1766	/*
1767	 * relocation: linelock whole relocated area
1768	 */
1769	else {
1770		lv->offset = 0;
1771		lv->length = sp->header.maxslot + newstblsize;
1772	}
1773
1774	dtlck->index++;
1775
1776	sp->header.maxslot = n;
1777	sp->header.stblindex = newstblindex;
1778	/* sp->header.nextindex remains the same */
1779
1780	/*
1781	 * add old stbl region at head of freelist
1782	 */
1783	fsi = oldstblindex;
1784	f = &sp->slot[fsi];
1785	last = sp->header.freelist;
1786	for (n = 0; n < oldstblsize; n++, fsi++, f++) {
1787		f->next = last;
1788		last = fsi;
1789	}
1790	sp->header.freelist = last;
1791	sp->header.freecnt += oldstblsize;
1792
1793	/*
1794	 * append free region of newly extended area at tail of freelist
1795	 */
1796	/* init free region of newly extended area */
1797	fsi = n = newstblindex + newstblsize;
1798	f = &sp->slot[fsi];
1799	for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
1800		f->next = fsi;
1801	f->next = -1;
1802
1803	/* append new free region at tail of old freelist */
1804	fsi = sp->header.freelist;
1805	if (fsi == -1)
1806		sp->header.freelist = n;
1807	else {
1808		do {
1809			f = &sp->slot[fsi];
1810			fsi = f->next;
1811		} while (fsi != -1);
1812
1813		f->next = n;
1814	}
1815
1816	sp->header.freecnt += sp->header.maxslot - n;
1817
1818	/*
1819	 * insert the new entry
1820	 */
1821	dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
1822
1823	BT_MARK_DIRTY(pmp, ip);
1824	/*
1825	 * linelock any freeslots residing in old extent
1826	 */
1827	if (type == tlckEXTEND) {
1828		n = sp->header.maxslot >> 2;
1829		if (sp->header.freelist < n)
1830			dtLinelockFreelist(sp, n, &dtlck);
1831	}
1832
1833	/*
1834	 *	update parent entry on the parent/root page
1835	 */
1836	/*
1837	 * acquire a transaction lock on the parent/root page
1838	 */
1839	tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
1840	dtlck = (struct dt_lock *) & tlck->lock;
1841	lv = & dtlck->lv[dtlck->index];
1842
1843	/* linelock parent entry - 1st slot */
1844	lv->offset = 1;
1845	lv->length = 1;
1846	dtlck->index++;
1847
1848	/* update the parent pxd for page extension */
1849	tpxd = (pxd_t *) & pp->slot[1];
1850	*tpxd = *pxd;
1851
1852	DT_PUTPAGE(pmp);
1853	return 0;
1854}
1855
1856
1857/*
1858 *	dtSplitRoot()
1859 *
1860 * function:
1861 *	split the full root page into
1862 *	original/root/split page and new right page
1863 *	i.e., root remains fixed in tree anchor (inode) and
1864 *	the root is copied to a single new right child page
1865 *	since root page << non-root page, and
1866 *	the split root page contains a single entry for the
1867 *	new right child page.
1868 *
1869 * parameter:
1870 *
1871 * return: 0 - success;
1872 *	   errno - failure;
1873 *	return new page pinned;
1874 */
1875static int dtSplitRoot(tid_t tid,
1876	    struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
1877{
1878	struct super_block *sb = ip->i_sb;
1879	struct metapage *smp;
1880	dtroot_t *sp;
1881	struct metapage *rmp;
1882	dtpage_t *rp;
1883	s64 rbn;
1884	int xlen;
1885	int xsize;
1886	struct dtslot *f;
1887	s8 *stbl;
1888	int fsi, stblsize, n;
1889	struct idtentry *s;
1890	pxd_t *ppxd;
1891	struct pxdlist *pxdlist;
1892	pxd_t *pxd;
1893	struct dt_lock *dtlck;
1894	struct tlock *tlck;
1895	struct lv *lv;
1896	int rc;
1897
1898	/* get split root page */
1899	smp = split->mp;
1900	sp = &JFS_IP(ip)->i_dtroot;
1901
1902	/*
1903	 *	allocate/initialize a single (right) child page
1904	 *
1905	 * N.B. at first split, a one (or two) block to fit new entry
1906	 * is allocated; at subsequent split, a full page is allocated;
1907	 */
1908	pxdlist = split->pxdlist;
1909	pxd = &pxdlist->pxd[pxdlist->npxd];
1910	pxdlist->npxd++;
1911	rbn = addressPXD(pxd);
1912	xlen = lengthPXD(pxd);
1913	xsize = xlen << JFS_SBI(sb)->l2bsize;
1914	rmp = get_metapage(ip, rbn, xsize, 1);
1915	if (!rmp)
1916		return -EIO;
1917
1918	rp = rmp->data;
1919
1920	/* Allocate blocks to quota. */
1921	rc = dquot_alloc_block(ip, lengthPXD(pxd));
1922	if (rc) {
1923		release_metapage(rmp);
1924		return rc;
1925	}
1926
1927	BT_MARK_DIRTY(rmp, ip);
1928	/*
1929	 * acquire a transaction lock on the new right page
1930	 */
1931	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
1932	dtlck = (struct dt_lock *) & tlck->lock;
1933
1934	rp->header.flag =
1935	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1936	rp->header.self = *pxd;
1937
1938	/* initialize sibling pointers */
1939	rp->header.next = 0;
1940	rp->header.prev = 0;
1941
1942	/*
1943	 *	move in-line root page into new right page extent
1944	 */
1945	/* linelock header + copied entries + new stbl (1st slot) in new page */
1946	ASSERT(dtlck->index == 0);
1947	lv = & dtlck->lv[0];
1948	lv->offset = 0;
1949	lv->length = 10;	/* 1 + 8 + 1 */
1950	dtlck->index++;
1951
1952	n = xsize >> L2DTSLOTSIZE;
1953	rp->header.maxslot = n;
1954	stblsize = (n + 31) >> L2DTSLOTSIZE;
1955
1956	/* copy old stbl to new stbl at start of extended area */
1957	rp->header.stblindex = DTROOTMAXSLOT;
1958	stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
1959	memcpy(stbl, sp->header.stbl, sp->header.nextindex);
1960	rp->header.nextindex = sp->header.nextindex;
1961
1962	/* copy old data area to start of new data area */
1963	memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
1964
1965	/*
1966	 * append free region of newly extended area at tail of freelist
1967	 */
1968	/* init free region of newly extended area */
1969	fsi = n = DTROOTMAXSLOT + stblsize;
1970	f = &rp->slot[fsi];
1971	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
1972		f->next = fsi;
1973	f->next = -1;
1974
1975	/* append new free region at tail of old freelist */
1976	fsi = sp->header.freelist;
1977	if (fsi == -1)
1978		rp->header.freelist = n;
1979	else {
1980		rp->header.freelist = fsi;
1981
1982		do {
1983			f = &rp->slot[fsi];
1984			fsi = f->next;
1985		} while (fsi != -1);
1986
1987		f->next = n;
1988	}
1989
1990	rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
1991
1992	/*
1993	 * Update directory index table for entries now in right page
1994	 */
1995	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
1996		s64 lblock;
1997		struct metapage *mp = NULL;
1998		struct ldtentry *ldtentry;
1999
2000		stbl = DT_GETSTBL(rp);
2001		for (n = 0; n < rp->header.nextindex; n++) {
2002			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
2003			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
2004				     rbn, n, &mp, &lblock);
2005		}
2006		if (mp)
2007			release_metapage(mp);
2008	}
2009	/*
2010	 * insert the new entry into the new right/child page
2011	 * (skip index in the new right page will not change)
2012	 */
2013	dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2014
2015	/*
2016	 *	reset parent/root page
2017	 *
2018	 * set the 1st entry offset to 0, which force the left-most key
2019	 * at any level of the tree to be less than any search key.
2020	 *
2021	 * The btree comparison code guarantees that the left-most key on any
2022	 * level of the tree is never used, so it doesn't need to be filled in.
2023	 */
2024	BT_MARK_DIRTY(smp, ip);
2025	/*
2026	 * acquire a transaction lock on the root page (in-memory inode)
2027	 */
2028	tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
2029	dtlck = (struct dt_lock *) & tlck->lock;
2030
2031	/* linelock root */
2032	ASSERT(dtlck->index == 0);
2033	lv = & dtlck->lv[0];
2034	lv->offset = 0;
2035	lv->length = DTROOTMAXSLOT;
2036	dtlck->index++;
2037
2038	/* update page header of root */
2039	if (sp->header.flag & BT_LEAF) {
2040		sp->header.flag &= ~BT_LEAF;
2041		sp->header.flag |= BT_INTERNAL;
2042	}
2043
2044	/* init the first entry */
2045	s = (struct idtentry *) & sp->slot[DTENTRYSTART];
2046	ppxd = (pxd_t *) s;
2047	*ppxd = *pxd;
2048	s->next = -1;
2049	s->namlen = 0;
2050
2051	stbl = sp->header.stbl;
2052	stbl[0] = DTENTRYSTART;
2053	sp->header.nextindex = 1;
2054
2055	/* init freelist */
2056	fsi = DTENTRYSTART + 1;
2057	f = &sp->slot[fsi];
2058
2059	/* init free region of remaining area */
2060	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2061		f->next = fsi;
2062	f->next = -1;
2063
2064	sp->header.freelist = DTENTRYSTART + 1;
2065	sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
2066
2067	*rmpp = rmp;
2068
2069	return 0;
2070}
2071
2072
2073/*
2074 *	dtDelete()
2075 *
2076 * function: delete the entry(s) referenced by a key.
2077 *
2078 * parameter:
2079 *
2080 * return:
2081 */
2082int dtDelete(tid_t tid,
2083	 struct inode *ip, struct component_name * key, ino_t * ino, int flag)
2084{
2085	int rc = 0;
2086	s64 bn;
2087	struct metapage *mp, *imp;
2088	dtpage_t *p;
2089	int index;
2090	struct btstack btstack;
2091	struct dt_lock *dtlck;
2092	struct tlock *tlck;
2093	struct lv *lv;
2094	int i;
2095	struct ldtentry *ldtentry;
2096	u8 *stbl;
2097	u32 table_index, next_index;
2098	struct metapage *nmp;
2099	dtpage_t *np;
2100
2101	/*
2102	 *	search for the entry to delete:
2103	 *
2104	 * dtSearch() returns (leaf page pinned, index at which to delete).
2105	 */
2106	if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
2107		return rc;
2108
2109	/* retrieve search result */
2110	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2111
2112	/*
2113	 * We need to find put the index of the next entry into the
2114	 * directory index table in order to resume a readdir from this
2115	 * entry.
2116	 */
2117	if (DO_INDEX(ip)) {
2118		stbl = DT_GETSTBL(p);
2119		ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
2120		table_index = le32_to_cpu(ldtentry->index);
2121		if (index == (p->header.nextindex - 1)) {
2122			/*
2123			 * Last entry in this leaf page
2124			 */
2125			if ((p->header.flag & BT_ROOT)
2126			    || (p->header.next == 0))
2127				next_index = -1;
2128			else {
2129				/* Read next leaf page */
2130				DT_GETPAGE(ip, le64_to_cpu(p->header.next),
2131					   nmp, PSIZE, np, rc);
2132				if (rc)
2133					next_index = -1;
2134				else {
2135					stbl = DT_GETSTBL(np);
2136					ldtentry =
2137					    (struct ldtentry *) & np->
2138					    slot[stbl[0]];
2139					next_index =
2140					    le32_to_cpu(ldtentry->index);
2141					DT_PUTPAGE(nmp);
2142				}
2143			}
2144		} else {
2145			ldtentry =
2146			    (struct ldtentry *) & p->slot[stbl[index + 1]];
2147			next_index = le32_to_cpu(ldtentry->index);
2148		}
2149		free_index(tid, ip, table_index, next_index);
2150	}
2151	/*
2152	 * the leaf page becomes empty, delete the page
2153	 */
2154	if (p->header.nextindex == 1) {
2155		/* delete empty page */
2156		rc = dtDeleteUp(tid, ip, mp, p, &btstack);
2157	}
2158	/*
2159	 * the leaf page has other entries remaining:
2160	 *
2161	 * delete the entry from the leaf page.
2162	 */
2163	else {
2164		BT_MARK_DIRTY(mp, ip);
2165		/*
2166		 * acquire a transaction lock on the leaf page
2167		 */
2168		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2169		dtlck = (struct dt_lock *) & tlck->lock;
2170
2171		/*
2172		 * Do not assume that dtlck->index will be zero.  During a
2173		 * rename within a directory, this transaction may have
2174		 * modified this page already when adding the new entry.
2175		 */
2176
2177		/* linelock header */
2178		if (dtlck->index >= dtlck->maxcnt)
2179			dtlck = (struct dt_lock *) txLinelock(dtlck);
2180		lv = & dtlck->lv[dtlck->index];
2181		lv->offset = 0;
2182		lv->length = 1;
2183		dtlck->index++;
2184
2185		/* linelock stbl of non-root leaf page */
2186		if (!(p->header.flag & BT_ROOT)) {
2187			if (dtlck->index >= dtlck->maxcnt)
2188				dtlck = (struct dt_lock *) txLinelock(dtlck);
2189			lv = & dtlck->lv[dtlck->index];
2190			i = index >> L2DTSLOTSIZE;
2191			lv->offset = p->header.stblindex + i;
2192			lv->length =
2193			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2194			    i + 1;
2195			dtlck->index++;
2196		}
2197
2198		/* free the leaf entry */
2199		dtDeleteEntry(p, index, &dtlck);
2200
2201		/*
2202		 * Update directory index table for entries moved in stbl
2203		 */
2204		if (DO_INDEX(ip) && index < p->header.nextindex) {
2205			s64 lblock;
2206
2207			imp = NULL;
2208			stbl = DT_GETSTBL(p);
2209			for (i = index; i < p->header.nextindex; i++) {
2210				ldtentry =
2211				    (struct ldtentry *) & p->slot[stbl[i]];
2212				modify_index(tid, ip,
2213					     le32_to_cpu(ldtentry->index),
2214					     bn, i, &imp, &lblock);
2215			}
2216			if (imp)
2217				release_metapage(imp);
2218		}
2219
2220		DT_PUTPAGE(mp);
2221	}
2222
2223	return rc;
2224}
2225
2226
2227/*
2228 *	dtDeleteUp()
2229 *
2230 * function:
2231 *	free empty pages as propagating deletion up the tree
2232 *
2233 * parameter:
2234 *
2235 * return:
2236 */
2237static int dtDeleteUp(tid_t tid, struct inode *ip,
2238	   struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
2239{
2240	int rc = 0;
2241	struct metapage *mp;
2242	dtpage_t *p;
2243	int index, nextindex;
2244	int xlen;
2245	struct btframe *parent;
2246	struct dt_lock *dtlck;
2247	struct tlock *tlck;
2248	struct lv *lv;
2249	struct pxd_lock *pxdlock;
2250	int i;
2251
2252	/*
2253	 *	keep the root leaf page which has become empty
2254	 */
2255	if (BT_IS_ROOT(fmp)) {
2256		/*
2257		 * reset the root
2258		 *
2259		 * dtInitRoot() acquires txlock on the root
2260		 */
2261		dtInitRoot(tid, ip, PARENT(ip));
2262
2263		DT_PUTPAGE(fmp);
2264
2265		return 0;
2266	}
2267
2268	/*
2269	 *	free the non-root leaf page
2270	 */
2271	/*
2272	 * acquire a transaction lock on the page
2273	 *
2274	 * write FREEXTENT|NOREDOPAGE log record
2275	 * N.B. linelock is overlaid as freed extent descriptor, and
2276	 * the buffer page is freed;
2277	 */
2278	tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2279	pxdlock = (struct pxd_lock *) & tlck->lock;
2280	pxdlock->flag = mlckFREEPXD;
2281	pxdlock->pxd = fp->header.self;
2282	pxdlock->index = 1;
2283
2284	/* update sibling pointers */
2285	if ((rc = dtRelink(tid, ip, fp))) {
2286		BT_PUTPAGE(fmp);
2287		return rc;
2288	}
2289
2290	xlen = lengthPXD(&fp->header.self);
2291
2292	/* Free quota allocation. */
2293	dquot_free_block(ip, xlen);
2294
2295	/* free/invalidate its buffer page */
2296	discard_metapage(fmp);
2297
2298	/*
2299	 *	propagate page deletion up the directory tree
2300	 *
2301	 * If the delete from the parent page makes it empty,
2302	 * continue all the way up the tree.
2303	 * stop if the root page is reached (which is never deleted) or
2304	 * if the entry deletion does not empty the page.
2305	 */
2306	while ((parent = BT_POP(btstack)) != NULL) {
2307		/* pin the parent page <sp> */
2308		DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
2309		if (rc)
2310			return rc;
2311
2312		/*
2313		 * free the extent of the child page deleted
2314		 */
2315		index = parent->index;
2316
2317		/*
2318		 * delete the entry for the child page from parent
2319		 */
2320		nextindex = p->header.nextindex;
2321
2322		/*
2323		 * the parent has the single entry being deleted:
2324		 *
2325		 * free the parent page which has become empty.
2326		 */
2327		if (nextindex == 1) {
2328			/*
2329			 * keep the root internal page which has become empty
2330			 */
2331			if (p->header.flag & BT_ROOT) {
2332				/*
2333				 * reset the root
2334				 *
2335				 * dtInitRoot() acquires txlock on the root
2336				 */
2337				dtInitRoot(tid, ip, PARENT(ip));
2338
2339				DT_PUTPAGE(mp);
2340
2341				return 0;
2342			}
2343			/*
2344			 * free the parent page
2345			 */
2346			else {
2347				/*
2348				 * acquire a transaction lock on the page
2349				 *
2350				 * write FREEXTENT|NOREDOPAGE log record
2351				 */
2352				tlck =
2353				    txMaplock(tid, ip,
2354					      tlckDTREE | tlckFREE);
2355				pxdlock = (struct pxd_lock *) & tlck->lock;
2356				pxdlock->flag = mlckFREEPXD;
2357				pxdlock->pxd = p->header.self;
2358				pxdlock->index = 1;
2359
2360				/* update sibling pointers */
2361				if ((rc = dtRelink(tid, ip, p))) {
2362					DT_PUTPAGE(mp);
2363					return rc;
2364				}
2365
2366				xlen = lengthPXD(&p->header.self);
2367
2368				/* Free quota allocation */
2369				dquot_free_block(ip, xlen);
2370
2371				/* free/invalidate its buffer page */
2372				discard_metapage(mp);
2373
2374				/* propagate up */
2375				continue;
2376			}
2377		}
2378
2379		/*
2380		 * the parent has other entries remaining:
2381		 *
2382		 * delete the router entry from the parent page.
2383		 */
2384		BT_MARK_DIRTY(mp, ip);
2385		/*
2386		 * acquire a transaction lock on the page
2387		 *
2388		 * action: router entry deletion
2389		 */
2390		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
2391		dtlck = (struct dt_lock *) & tlck->lock;
2392
2393		/* linelock header */
2394		if (dtlck->index >= dtlck->maxcnt)
2395			dtlck = (struct dt_lock *) txLinelock(dtlck);
2396		lv = & dtlck->lv[dtlck->index];
2397		lv->offset = 0;
2398		lv->length = 1;
2399		dtlck->index++;
2400
2401		/* linelock stbl of non-root leaf page */
2402		if (!(p->header.flag & BT_ROOT)) {
2403			if (dtlck->index < dtlck->maxcnt)
2404				lv++;
2405			else {
2406				dtlck = (struct dt_lock *) txLinelock(dtlck);
2407				lv = & dtlck->lv[0];
2408			}
2409			i = index >> L2DTSLOTSIZE;
2410			lv->offset = p->header.stblindex + i;
2411			lv->length =
2412			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
2413			    i + 1;
2414			dtlck->index++;
2415		}
2416
2417		/* free the router entry */
2418		dtDeleteEntry(p, index, &dtlck);
2419
2420		/* reset key of new leftmost entry of level (for consistency) */
2421		if (index == 0 &&
2422		    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
2423			dtTruncateEntry(p, 0, &dtlck);
2424
2425		/* unpin the parent page */
2426		DT_PUTPAGE(mp);
2427
2428		/* exit propagation up */
2429		break;
2430	}
2431
2432	if (!DO_INDEX(ip))
2433		ip->i_size -= PSIZE;
2434
2435	return 0;
2436}
2437
2438#ifdef _NOTYET
2439/*
2440 * NAME:	dtRelocate()
2441 *
2442 * FUNCTION:	relocate dtpage (internal or leaf) of directory;
2443 *		This function is mainly used by defragfs utility.
2444 */
2445int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2446	       s64 nxaddr)
2447{
2448	int rc = 0;
2449	struct metapage *mp, *pmp, *lmp, *rmp;
2450	dtpage_t *p, *pp, *rp = 0, *lp= 0;
2451	s64 bn;
2452	int index;
2453	struct btstack btstack;
2454	pxd_t *pxd;
2455	s64 oxaddr, nextbn, prevbn;
2456	int xlen, xsize;
2457	struct tlock *tlck;
2458	struct dt_lock *dtlck;
2459	struct pxd_lock *pxdlock;
2460	s8 *stbl;
2461	struct lv *lv;
2462
2463	oxaddr = addressPXD(opxd);
2464	xlen = lengthPXD(opxd);
2465
2466	jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
2467		   (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
2468		   xlen);
2469
2470	/*
2471	 *	1. get the internal parent dtpage covering
2472	 *	router entry for the tartget page to be relocated;
2473	 */
2474	rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2475	if (rc)
2476		return rc;
2477
2478	/* retrieve search result */
2479	DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2480	jfs_info("dtRelocate: parent router entry validated.");
2481
2482	/*
2483	 *	2. relocate the target dtpage
2484	 */
2485	/* read in the target page from src extent */
2486	DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
2487	if (rc) {
2488		/* release the pinned parent page */
2489		DT_PUTPAGE(pmp);
2490		return rc;
2491	}
2492
2493	/*
2494	 * read in sibling pages if any to update sibling pointers;
2495	 */
2496	rmp = NULL;
2497	if (p->header.next) {
2498		nextbn = le64_to_cpu(p->header.next);
2499		DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
2500		if (rc) {
2501			DT_PUTPAGE(mp);
2502			DT_PUTPAGE(pmp);
2503			return (rc);
2504		}
2505	}
2506
2507	lmp = NULL;
2508	if (p->header.prev) {
2509		prevbn = le64_to_cpu(p->header.prev);
2510		DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
2511		if (rc) {
2512			DT_PUTPAGE(mp);
2513			DT_PUTPAGE(pmp);
2514			if (rmp)
2515				DT_PUTPAGE(rmp);
2516			return (rc);
2517		}
2518	}
2519
2520	/* at this point, all xtpages to be updated are in memory */
2521
2522	/*
2523	 * update sibling pointers of sibling dtpages if any;
2524	 */
2525	if (lmp) {
2526		tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
2527		dtlck = (struct dt_lock *) & tlck->lock;
2528		/* linelock header */
2529		ASSERT(dtlck->index == 0);
2530		lv = & dtlck->lv[0];
2531		lv->offset = 0;
2532		lv->length = 1;
2533		dtlck->index++;
2534
2535		lp->header.next = cpu_to_le64(nxaddr);
2536		DT_PUTPAGE(lmp);
2537	}
2538
2539	if (rmp) {
2540		tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
2541		dtlck = (struct dt_lock *) & tlck->lock;
2542		/* linelock header */
2543		ASSERT(dtlck->index == 0);
2544		lv = & dtlck->lv[0];
2545		lv->offset = 0;
2546		lv->length = 1;
2547		dtlck->index++;
2548
2549		rp->header.prev = cpu_to_le64(nxaddr);
2550		DT_PUTPAGE(rmp);
2551	}
2552
2553	/*
2554	 * update the target dtpage to be relocated
2555	 *
2556	 * write LOG_REDOPAGE of LOG_NEW type for dst page
2557	 * for the whole target page (logredo() will apply
2558	 * after image and update bmap for allocation of the
2559	 * dst extent), and update bmap for allocation of
2560	 * the dst extent;
2561	 */
2562	tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
2563	dtlck = (struct dt_lock *) & tlck->lock;
2564	/* linelock header */
2565	ASSERT(dtlck->index == 0);
2566	lv = & dtlck->lv[0];
2567
2568	/* update the self address in the dtpage header */
2569	pxd = &p->header.self;
2570	PXDaddress(pxd, nxaddr);
2571
2572	/* the dst page is the same as the src page, i.e.,
2573	 * linelock for afterimage of the whole page;
2574	 */
2575	lv->offset = 0;
2576	lv->length = p->header.maxslot;
2577	dtlck->index++;
2578
2579	/* update the buffer extent descriptor of the dtpage */
2580	xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2581
2582	/* unpin the relocated page */
2583	DT_PUTPAGE(mp);
2584	jfs_info("dtRelocate: target dtpage relocated.");
2585
2586	/* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
2587	 * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
2588	 * will also force a bmap update ).
2589	 */
2590
2591	/*
2592	 *	3. acquire maplock for the source extent to be freed;
2593	 */
2594	/* for dtpage relocation, write a LOG_NOREDOPAGE record
2595	 * for the source dtpage (logredo() will init NoRedoPage
2596	 * filter and will also update bmap for free of the source
2597	 * dtpage), and upadte bmap for free of the source dtpage;
2598	 */
2599	tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
2600	pxdlock = (struct pxd_lock *) & tlck->lock;
2601	pxdlock->flag = mlckFREEPXD;
2602	PXDaddress(&pxdlock->pxd, oxaddr);
2603	PXDlength(&pxdlock->pxd, xlen);
2604	pxdlock->index = 1;
2605
2606	/*
2607	 *	4. update the parent router entry for relocation;
2608	 *
2609	 * acquire tlck for the parent entry covering the target dtpage;
2610	 * write LOG_REDOPAGE to apply after image only;
2611	 */
2612	jfs_info("dtRelocate: update parent router entry.");
2613	tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
2614	dtlck = (struct dt_lock *) & tlck->lock;
2615	lv = & dtlck->lv[dtlck->index];
2616
2617	/* update the PXD with the new address */
2618	stbl = DT_GETSTBL(pp);
2619	pxd = (pxd_t *) & pp->slot[stbl[index]];
2620	PXDaddress(pxd, nxaddr);
2621	lv->offset = stbl[index];
2622	lv->length = 1;
2623	dtlck->index++;
2624
2625	/* unpin the parent dtpage */
2626	DT_PUTPAGE(pmp);
2627
2628	return rc;
2629}
2630
2631/*
2632 * NAME:	dtSearchNode()
2633 *
2634 * FUNCTION:	Search for an dtpage containing a specified address
2635 *		This function is mainly used by defragfs utility.
2636 *
2637 * NOTE:	Search result on stack, the found page is pinned at exit.
2638 *		The result page must be an internal dtpage.
2639 *		lmxaddr give the address of the left most page of the
2640 *		dtree level, in which the required dtpage resides.
2641 */
2642static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2643			struct btstack * btstack)
2644{
2645	int rc = 0;
2646	s64 bn;
2647	struct metapage *mp;
2648	dtpage_t *p;
2649	int psize = 288;	/* initial in-line directory */
2650	s8 *stbl;
2651	int i;
2652	pxd_t *pxd;
2653	struct btframe *btsp;
2654
2655	BT_CLR(btstack);	/* reset stack */
2656
2657	/*
2658	 *	descend tree to the level with specified leftmost page
2659	 *
2660	 *  by convention, root bn = 0.
2661	 */
2662	for (bn = 0;;) {
2663		/* get/pin the page to search */
2664		DT_GETPAGE(ip, bn, mp, psize, p, rc);
2665		if (rc)
2666			return rc;
2667
2668		/* does the xaddr of leftmost page of the levevl
2669		 * matches levevl search key ?
2670		 */
2671		if (p->header.flag & BT_ROOT) {
2672			if (lmxaddr == 0)
2673				break;
2674		} else if (addressPXD(&p->header.self) == lmxaddr)
2675			break;
2676
2677		/*
2678		 * descend down to leftmost child page
2679		 */
2680		if (p->header.flag & BT_LEAF) {
2681			DT_PUTPAGE(mp);
2682			return -ESTALE;
2683		}
2684
2685		/* get the leftmost entry */
2686		stbl = DT_GETSTBL(p);
2687		pxd = (pxd_t *) & p->slot[stbl[0]];
2688
2689		/* get the child page block address */
2690		bn = addressPXD(pxd);
2691		psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
2692		/* unpin the parent page */
2693		DT_PUTPAGE(mp);
2694	}
2695
2696	/*
2697	 *	search each page at the current levevl
2698	 */
2699      loop:
2700	stbl = DT_GETSTBL(p);
2701	for (i = 0; i < p->header.nextindex; i++) {
2702		pxd = (pxd_t *) & p->slot[stbl[i]];
2703
2704		/* found the specified router entry */
2705		if (addressPXD(pxd) == addressPXD(kpxd) &&
2706		    lengthPXD(pxd) == lengthPXD(kpxd)) {
2707			btsp = btstack->top;
2708			btsp->bn = bn;
2709			btsp->index = i;
2710			btsp->mp = mp;
2711
2712			return 0;
2713		}
2714	}
2715
2716	/* get the right sibling page if any */
2717	if (p->header.next)
2718		bn = le64_to_cpu(p->header.next);
2719	else {
2720		DT_PUTPAGE(mp);
2721		return -ESTALE;
2722	}
2723
2724	/* unpin current page */
2725	DT_PUTPAGE(mp);
2726
2727	/* get the right sibling page */
2728	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
2729	if (rc)
2730		return rc;
2731
2732	goto loop;
2733}
2734#endif /* _NOTYET */
2735
2736/*
2737 *	dtRelink()
2738 *
2739 * function:
2740 *	link around a freed page.
2741 *
2742 * parameter:
2743 *	fp:	page to be freed
2744 *
2745 * return:
2746 */
2747static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
2748{
2749	int rc;
2750	struct metapage *mp;
2751	s64 nextbn, prevbn;
2752	struct tlock *tlck;
2753	struct dt_lock *dtlck;
2754	struct lv *lv;
2755
2756	nextbn = le64_to_cpu(p->header.next);
2757	prevbn = le64_to_cpu(p->header.prev);
2758
2759	/* update prev pointer of the next page */
2760	if (nextbn != 0) {
2761		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
2762		if (rc)
2763			return rc;
2764
2765		BT_MARK_DIRTY(mp, ip);
2766		/*
2767		 * acquire a transaction lock on the next page
2768		 *
2769		 * action: update prev pointer;
2770		 */
2771		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2772		jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2773			tlck, ip, mp);
2774		dtlck = (struct dt_lock *) & tlck->lock;
2775
2776		/* linelock header */
2777		if (dtlck->index >= dtlck->maxcnt)
2778			dtlck = (struct dt_lock *) txLinelock(dtlck);
2779		lv = & dtlck->lv[dtlck->index];
2780		lv->offset = 0;
2781		lv->length = 1;
2782		dtlck->index++;
2783
2784		p->header.prev = cpu_to_le64(prevbn);
2785		DT_PUTPAGE(mp);
2786	}
2787
2788	/* update next pointer of the previous page */
2789	if (prevbn != 0) {
2790		DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
2791		if (rc)
2792			return rc;
2793
2794		BT_MARK_DIRTY(mp, ip);
2795		/*
2796		 * acquire a transaction lock on the prev page
2797		 *
2798		 * action: update next pointer;
2799		 */
2800		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
2801		jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
2802			tlck, ip, mp);
2803		dtlck = (struct dt_lock *) & tlck->lock;
2804
2805		/* linelock header */
2806		if (dtlck->index >= dtlck->maxcnt)
2807			dtlck = (struct dt_lock *) txLinelock(dtlck);
2808		lv = & dtlck->lv[dtlck->index];
2809		lv->offset = 0;
2810		lv->length = 1;
2811		dtlck->index++;
2812
2813		p->header.next = cpu_to_le64(nextbn);
2814		DT_PUTPAGE(mp);
2815	}
2816
2817	return 0;
2818}
2819
2820
2821/*
2822 *	dtInitRoot()
2823 *
2824 * initialize directory root (inline in inode)
2825 */
2826void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
2827{
2828	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2829	dtroot_t *p;
2830	int fsi;
2831	struct dtslot *f;
2832	struct tlock *tlck;
2833	struct dt_lock *dtlck;
2834	struct lv *lv;
2835	u16 xflag_save;
2836
2837	/*
2838	 * If this was previously an non-empty directory, we need to remove
2839	 * the old directory table.
2840	 */
2841	if (DO_INDEX(ip)) {
2842		if (!jfs_dirtable_inline(ip)) {
2843			struct tblock *tblk = tid_to_tblock(tid);
2844			/*
2845			 * We're playing games with the tid's xflag.  If
2846			 * we're removing a regular file, the file's xtree
2847			 * is committed with COMMIT_PMAP, but we always
2848			 * commit the directories xtree with COMMIT_PWMAP.
2849			 */
2850			xflag_save = tblk->xflag;
2851			tblk->xflag = 0;
2852			/*
2853			 * xtTruncate isn't guaranteed to fully truncate
2854			 * the xtree.  The caller needs to check i_size
2855			 * after committing the transaction to see if
2856			 * additional truncation is needed.  The
2857			 * COMMIT_Stale flag tells caller that we
2858			 * initiated the truncation.
2859			 */
2860			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
2861			set_cflag(COMMIT_Stale, ip);
2862
2863			tblk->xflag = xflag_save;
2864		} else
2865			ip->i_size = 1;
2866
2867		jfs_ip->next_index = 2;
2868	} else
2869		ip->i_size = IDATASIZE;
2870
2871	/*
2872	 * acquire a transaction lock on the root
2873	 *
2874	 * action: directory initialization;
2875	 */
2876	tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
2877		      tlckDTREE | tlckENTRY | tlckBTROOT);
2878	dtlck = (struct dt_lock *) & tlck->lock;
2879
2880	/* linelock root */
2881	ASSERT(dtlck->index == 0);
2882	lv = & dtlck->lv[0];
2883	lv->offset = 0;
2884	lv->length = DTROOTMAXSLOT;
2885	dtlck->index++;
2886
2887	p = &jfs_ip->i_dtroot;
2888
2889	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2890
2891	p->header.nextindex = 0;
2892
2893	/* init freelist */
2894	fsi = 1;
2895	f = &p->slot[fsi];
2896
2897	/* init data area of root */
2898	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
2899		f->next = fsi;
2900	f->next = -1;
2901
2902	p->header.freelist = 1;
2903	p->header.freecnt = 8;
2904
2905	/* init '..' entry */
2906	p->header.idotdot = cpu_to_le32(idotdot);
2907
2908	return;
2909}
2910
2911/*
2912 *	add_missing_indices()
2913 *
2914 * function: Fix dtree page in which one or more entries has an invalid index.
2915 *	     fsck.jfs should really fix this, but it currently does not.
2916 *	     Called from jfs_readdir when bad index is detected.
2917 */
2918static void add_missing_indices(struct inode *inode, s64 bn)
2919{
2920	struct ldtentry *d;
2921	struct dt_lock *dtlck;
2922	int i;
2923	uint index;
2924	struct lv *lv;
2925	struct metapage *mp;
2926	dtpage_t *p;
2927	int rc;
2928	s8 *stbl;
2929	tid_t tid;
2930	struct tlock *tlck;
2931
2932	tid = txBegin(inode->i_sb, 0);
2933
2934	DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);
2935
2936	if (rc) {
2937		printk(KERN_ERR "DT_GETPAGE failed!\n");
2938		goto end;
2939	}
2940	BT_MARK_DIRTY(mp, inode);
2941
2942	ASSERT(p->header.flag & BT_LEAF);
2943
2944	tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
2945	if (BT_IS_ROOT(mp))
2946		tlck->type |= tlckBTROOT;
2947
2948	dtlck = (struct dt_lock *) &tlck->lock;
2949
2950	stbl = DT_GETSTBL(p);
2951	for (i = 0; i < p->header.nextindex; i++) {
2952		d = (struct ldtentry *) &p->slot[stbl[i]];
2953		index = le32_to_cpu(d->index);
2954		if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
2955			d->index = cpu_to_le32(add_index(tid, inode, bn, i));
2956			if (dtlck->index >= dtlck->maxcnt)
2957				dtlck = (struct dt_lock *) txLinelock(dtlck);
2958			lv = &dtlck->lv[dtlck->index];
2959			lv->offset = stbl[i];
2960			lv->length = 1;
2961			dtlck->index++;
2962		}
2963	}
2964
2965	DT_PUTPAGE(mp);
2966	(void) txCommit(tid, 1, &inode, 0);
2967end:
2968	txEnd(tid);
2969}
2970
2971/*
2972 * Buffer to hold directory entry info while traversing a dtree page
2973 * before being fed to the filldir function
2974 */
2975struct jfs_dirent {
2976	loff_t position;
2977	int ino;
2978	u16 name_len;
2979	char name[0];
2980};
2981
2982/*
2983 * function to determine next variable-sized jfs_dirent in buffer
2984 */
2985static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
2986{
2987	return (struct jfs_dirent *)
2988		((char *)dirent +
2989		 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
2990		   sizeof (loff_t) - 1) &
2991		  ~(sizeof (loff_t) - 1)));
2992}
2993
2994/*
2995 *	jfs_readdir()
2996 *
2997 * function: read directory entries sequentially
2998 *	from the specified entry offset
2999 *
3000 * parameter:
3001 *
3002 * return: offset = (pn, index) of start entry
3003 *	of next jfs_readdir()/dtRead()
3004 */
3005int jfs_readdir(struct file *file, struct dir_context *ctx)
3006{
3007	struct inode *ip = file_inode(file);
3008	struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
3009	int rc = 0;
3010	loff_t dtpos;	/* legacy OS/2 style position */
3011	struct dtoffset {
3012		s16 pn;
3013		s16 index;
3014		s32 unused;
3015	} *dtoffset = (struct dtoffset *) &dtpos;
3016	s64 bn;
3017	struct metapage *mp;
3018	dtpage_t *p;
3019	int index;
3020	s8 *stbl;
3021	struct btstack btstack;
3022	int i, next;
3023	struct ldtentry *d;
3024	struct dtslot *t;
3025	int d_namleft, len, outlen;
3026	unsigned long dirent_buf;
3027	char *name_ptr;
3028	u32 dir_index;
3029	int do_index = 0;
3030	uint loop_count = 0;
3031	struct jfs_dirent *jfs_dirent;
3032	int jfs_dirents;
3033	int overflow, fix_page, page_fixed = 0;
3034	static int unique_pos = 2;	/* If we can't fix broken index */
3035
3036	if (ctx->pos == DIREND)
3037		return 0;
3038
3039	if (DO_INDEX(ip)) {
3040		/*
3041		 * persistent index is stored in directory entries.
3042		 * Special cases:	 0 = .
3043		 *			 1 = ..
3044		 *			-1 = End of directory
3045		 */
3046		do_index = 1;
3047
3048		dir_index = (u32) ctx->pos;
3049
3050		/*
3051		 * NFSv4 reserves cookies 1 and 2 for . and .. so the value
3052		 * we return to the vfs is one greater than the one we use
3053		 * internally.
3054		 */
3055		if (dir_index)
3056			dir_index--;
3057
3058		if (dir_index > 1) {
3059			struct dir_table_slot dirtab_slot;
3060
3061			if (dtEmpty(ip) ||
3062			    (dir_index >= JFS_IP(ip)->next_index)) {
3063				/* Stale position.  Directory has shrunk */
3064				ctx->pos = DIREND;
3065				return 0;
3066			}
3067		      repeat:
3068			rc = read_index(ip, dir_index, &dirtab_slot);
3069			if (rc) {
3070				ctx->pos = DIREND;
3071				return rc;
3072			}
3073			if (dirtab_slot.flag == DIR_INDEX_FREE) {
3074				if (loop_count++ > JFS_IP(ip)->next_index) {
3075					jfs_err("jfs_readdir detected "
3076						   "infinite loop!");
3077					ctx->pos = DIREND;
3078					return 0;
3079				}
3080				dir_index = le32_to_cpu(dirtab_slot.addr2);
3081				if (dir_index == -1) {
3082					ctx->pos = DIREND;
3083					return 0;
3084				}
3085				goto repeat;
3086			}
3087			bn = addressDTS(&dirtab_slot);
3088			index = dirtab_slot.slot;
3089			DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3090			if (rc) {
3091				ctx->pos = DIREND;
3092				return 0;
3093			}
3094			if (p->header.flag & BT_INTERNAL) {
3095				jfs_err("jfs_readdir: bad index table");
3096				DT_PUTPAGE(mp);
3097				ctx->pos = DIREND;
3098				return 0;
3099			}
3100		} else {
3101			if (dir_index == 0) {
3102				/*
3103				 * self "."
3104				 */
3105				ctx->pos = 1;
3106				if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
3107					return 0;
3108			}
3109			/*
3110			 * parent ".."
3111			 */
3112			ctx->pos = 2;
3113			if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
3114				return 0;
3115
3116			/*
3117			 * Find first entry of left-most leaf
3118			 */
3119			if (dtEmpty(ip)) {
3120				ctx->pos = DIREND;
3121				return 0;
3122			}
3123
3124			if ((rc = dtReadFirst(ip, &btstack)))
3125				return rc;
3126
3127			DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3128		}
3129	} else {
3130		/*
3131		 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3132		 *
3133		 * pn = 0; index = 1:	First entry "."
3134		 * pn = 0; index = 2:	Second entry ".."
3135		 * pn > 0:		Real entries, pn=1 -> leftmost page
3136		 * pn = index = -1:	No more entries
3137		 */
3138		dtpos = ctx->pos;
3139		if (dtpos < 2) {
3140			/* build "." entry */
3141			ctx->pos = 1;
3142			if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR))
3143				return 0;
3144			dtoffset->index = 2;
3145			ctx->pos = dtpos;
3146		}
3147
3148		if (dtoffset->pn == 0) {
3149			if (dtoffset->index == 2) {
3150				/* build ".." entry */
3151				if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR))
3152					return 0;
3153			} else {
3154				jfs_err("jfs_readdir called with "
3155					"invalid offset!");
3156			}
3157			dtoffset->pn = 1;
3158			dtoffset->index = 0;
3159			ctx->pos = dtpos;
3160		}
3161
3162		if (dtEmpty(ip)) {
3163			ctx->pos = DIREND;
3164			return 0;
3165		}
3166
3167		if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) {
3168			jfs_err("jfs_readdir: unexpected rc = %d "
3169				"from dtReadNext", rc);
3170			ctx->pos = DIREND;
3171			return 0;
3172		}
3173		/* get start leaf page and index */
3174		DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
3175
3176		/* offset beyond directory eof ? */
3177		if (bn < 0) {
3178			ctx->pos = DIREND;
3179			return 0;
3180		}
3181	}
3182
3183	dirent_buf = __get_free_page(GFP_KERNEL);
3184	if (dirent_buf == 0) {
3185		DT_PUTPAGE(mp);
3186		jfs_warn("jfs_readdir: __get_free_page failed!");
3187		ctx->pos = DIREND;
3188		return -ENOMEM;
3189	}
3190
3191	while (1) {
3192		jfs_dirent = (struct jfs_dirent *) dirent_buf;
3193		jfs_dirents = 0;
3194		overflow = fix_page = 0;
3195
3196		stbl = DT_GETSTBL(p);
3197
3198		for (i = index; i < p->header.nextindex; i++) {
3199			d = (struct ldtentry *) & p->slot[stbl[i]];
3200
3201			if (((long) jfs_dirent + d->namlen + 1) >
3202			    (dirent_buf + PAGE_SIZE)) {
3203				/* DBCS codepages could overrun dirent_buf */
3204				index = i;
3205				overflow = 1;
3206				break;
3207			}
3208
3209			d_namleft = d->namlen;
3210			name_ptr = jfs_dirent->name;
3211			jfs_dirent->ino = le32_to_cpu(d->inumber);
3212
3213			if (do_index) {
3214				len = min(d_namleft, DTLHDRDATALEN);
3215				jfs_dirent->position = le32_to_cpu(d->index);
3216				/*
3217				 * d->index should always be valid, but it
3218				 * isn't.  fsck.jfs doesn't create the
3219				 * directory index for the lost+found
3220				 * directory.  Rather than let it go,
3221				 * we can try to fix it.
3222				 */
3223				if ((jfs_dirent->position < 2) ||
3224				    (jfs_dirent->position >=
3225				     JFS_IP(ip)->next_index)) {
3226					if (!page_fixed && !isReadOnly(ip)) {
3227						fix_page = 1;
3228						/*
3229						 * setting overflow and setting
3230						 * index to i will cause the
3231						 * same page to be processed
3232						 * again starting here
3233						 */
3234						overflow = 1;
3235						index = i;
3236						break;
3237					}
3238					jfs_dirent->position = unique_pos++;
3239				}
3240				/*
3241				 * We add 1 to the index because we may
3242				 * use a value of 2 internally, and NFSv4
3243				 * doesn't like that.
3244				 */
3245				jfs_dirent->position++;
3246			} else {
3247				jfs_dirent->position = dtpos;
3248				len = min(d_namleft, DTLHDRDATALEN_LEGACY);
3249			}
3250
3251			/* copy the name of head/only segment */
3252			outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
3253						   codepage);
3254			jfs_dirent->name_len = outlen;
3255
3256			/* copy name in the additional segment(s) */
3257			next = d->next;
3258			while (next >= 0) {
3259				t = (struct dtslot *) & p->slot[next];
3260				name_ptr += outlen;
3261				d_namleft -= len;
3262				/* Sanity Check */
3263				if (d_namleft == 0) {
3264					jfs_error(ip->i_sb,
3265						  "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n",
3266						  (long)ip->i_ino,
3267						  (long long)bn,
3268						  i);
3269					goto skip_one;
3270				}
3271				len = min(d_namleft, DTSLOTDATALEN);
3272				outlen = jfs_strfromUCS_le(name_ptr, t->name,
3273							   len, codepage);
3274				jfs_dirent->name_len += outlen;
3275
3276				next = t->next;
3277			}
3278
3279			jfs_dirents++;
3280			jfs_dirent = next_jfs_dirent(jfs_dirent);
3281skip_one:
3282			if (!do_index)
3283				dtoffset->index++;
3284		}
3285
3286		if (!overflow) {
3287			/* Point to next leaf page */
3288			if (p->header.flag & BT_ROOT)
3289				bn = 0;
3290			else {
3291				bn = le64_to_cpu(p->header.next);
3292				index = 0;
3293				/* update offset (pn:index) for new page */
3294				if (!do_index) {
3295					dtoffset->pn++;
3296					dtoffset->index = 0;
3297				}
3298			}
3299			page_fixed = 0;
3300		}
3301
3302		/* unpin previous leaf page */
3303		DT_PUTPAGE(mp);
3304
3305		jfs_dirent = (struct jfs_dirent *) dirent_buf;
3306		while (jfs_dirents--) {
3307			ctx->pos = jfs_dirent->position;
3308			if (!dir_emit(ctx, jfs_dirent->name,
3309				    jfs_dirent->name_len,
3310				    jfs_dirent->ino, DT_UNKNOWN))
3311				goto out;
3312			jfs_dirent = next_jfs_dirent(jfs_dirent);
3313		}
3314
3315		if (fix_page) {
3316			add_missing_indices(ip, bn);
3317			page_fixed = 1;
3318		}
3319
3320		if (!overflow && (bn == 0)) {
3321			ctx->pos = DIREND;
3322			break;
3323		}
3324
3325		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3326		if (rc) {
3327			free_page(dirent_buf);
3328			return rc;
3329		}
3330	}
3331
3332      out:
3333	free_page(dirent_buf);
3334
3335	return rc;
3336}
3337
3338
3339/*
3340 *	dtReadFirst()
3341 *
3342 * function: get the leftmost page of the directory
3343 */
3344static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3345{
3346	int rc = 0;
3347	s64 bn;
3348	int psize = 288;	/* initial in-line directory */
3349	struct metapage *mp;
3350	dtpage_t *p;
3351	s8 *stbl;
3352	struct btframe *btsp;
3353	pxd_t *xd;
3354
3355	BT_CLR(btstack);	/* reset stack */
3356
3357	/*
3358	 *	descend leftmost path of the tree
3359	 *
3360	 * by convention, root bn = 0.
3361	 */
3362	for (bn = 0;;) {
3363		DT_GETPAGE(ip, bn, mp, psize, p, rc);
3364		if (rc)
3365			return rc;
3366
3367		/*
3368		 * leftmost leaf page
3369		 */
3370		if (p->header.flag & BT_LEAF) {
3371			/* return leftmost entry */
3372			btsp = btstack->top;
3373			btsp->bn = bn;
3374			btsp->index = 0;
3375			btsp->mp = mp;
3376
3377			return 0;
3378		}
3379
3380		/*
3381		 * descend down to leftmost child page
3382		 */
3383		if (BT_STACK_FULL(btstack)) {
3384			DT_PUTPAGE(mp);
3385			jfs_error(ip->i_sb, "btstack overrun\n");
3386			BT_STACK_DUMP(btstack);
3387			return -EIO;
3388		}
3389		/* push (bn, index) of the parent page/entry */
3390		BT_PUSH(btstack, bn, 0);
3391
3392		/* get the leftmost entry */
3393		stbl = DT_GETSTBL(p);
3394		xd = (pxd_t *) & p->slot[stbl[0]];
3395
3396		/* get the child page block address */
3397		bn = addressPXD(xd);
3398		psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
3399
3400		/* unpin the parent page */
3401		DT_PUTPAGE(mp);
3402	}
3403}
3404
3405
3406/*
3407 *	dtReadNext()
3408 *
3409 * function: get the page of the specified offset (pn:index)
3410 *
3411 * return: if (offset > eof), bn = -1;
3412 *
3413 * note: if index > nextindex of the target leaf page,
3414 * start with 1st entry of next leaf page;
3415 */
3416static int dtReadNext(struct inode *ip, loff_t * offset,
3417		      struct btstack * btstack)
3418{
3419	int rc = 0;
3420	struct dtoffset {
3421		s16 pn;
3422		s16 index;
3423		s32 unused;
3424	} *dtoffset = (struct dtoffset *) offset;
3425	s64 bn;
3426	struct metapage *mp;
3427	dtpage_t *p;
3428	int index;
3429	int pn;
3430	s8 *stbl;
3431	struct btframe *btsp, *parent;
3432	pxd_t *xd;
3433
3434	/*
3435	 * get leftmost leaf page pinned
3436	 */
3437	if ((rc = dtReadFirst(ip, btstack)))
3438		return rc;
3439
3440	/* get leaf page */
3441	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
3442
3443	/* get the start offset (pn:index) */
3444	pn = dtoffset->pn - 1;	/* Now pn = 0 represents leftmost leaf */
3445	index = dtoffset->index;
3446
3447	/* start at leftmost page ? */
3448	if (pn == 0) {
3449		/* offset beyond eof ? */
3450		if (index < p->header.nextindex)
3451			goto out;
3452
3453		if (p->header.flag & BT_ROOT) {
3454			bn = -1;
3455			goto out;
3456		}
3457
3458		/* start with 1st entry of next leaf page */
3459		dtoffset->pn++;
3460		dtoffset->index = index = 0;
3461		goto a;
3462	}
3463
3464	/* start at non-leftmost page: scan parent pages for large pn */
3465	if (p->header.flag & BT_ROOT) {
3466		bn = -1;
3467		goto out;
3468	}
3469
3470	/* start after next leaf page ? */
3471	if (pn > 1)
3472		goto b;
3473
3474	/* get leaf page pn = 1 */
3475      a:
3476	bn = le64_to_cpu(p->header.next);
3477
3478	/* unpin leaf page */
3479	DT_PUTPAGE(mp);
3480
3481	/* offset beyond eof ? */
3482	if (bn == 0) {
3483		bn = -1;
3484		goto out;
3485	}
3486
3487	goto c;
3488
3489	/*
3490	 * scan last internal page level to get target leaf page
3491	 */
3492      b:
3493	/* unpin leftmost leaf page */
3494	DT_PUTPAGE(mp);
3495
3496	/* get left most parent page */
3497	btsp = btstack->top;
3498	parent = btsp - 1;
3499	bn = parent->bn;
3500	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3501	if (rc)
3502		return rc;
3503
3504	/* scan parent pages at last internal page level */
3505	while (pn >= p->header.nextindex) {
3506		pn -= p->header.nextindex;
3507
3508		/* get next parent page address */
3509		bn = le64_to_cpu(p->header.next);
3510
3511		/* unpin current parent page */
3512		DT_PUTPAGE(mp);
3513
3514		/* offset beyond eof ? */
3515		if (bn == 0) {
3516			bn = -1;
3517			goto out;
3518		}
3519
3520		/* get next parent page */
3521		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3522		if (rc)
3523			return rc;
3524
3525		/* update parent page stack frame */
3526		parent->bn = bn;
3527	}
3528
3529	/* get leaf page address */
3530	stbl = DT_GETSTBL(p);
3531	xd = (pxd_t *) & p->slot[stbl[pn]];
3532	bn = addressPXD(xd);
3533
3534	/* unpin parent page */
3535	DT_PUTPAGE(mp);
3536
3537	/*
3538	 * get target leaf page
3539	 */
3540      c:
3541	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3542	if (rc)
3543		return rc;
3544
3545	/*
3546	 * leaf page has been completed:
3547	 * start with 1st entry of next leaf page
3548	 */
3549	if (index >= p->header.nextindex) {
3550		bn = le64_to_cpu(p->header.next);
3551
3552		/* unpin leaf page */
3553		DT_PUTPAGE(mp);
3554
3555		/* offset beyond eof ? */
3556		if (bn == 0) {
3557			bn = -1;
3558			goto out;
3559		}
3560
3561		/* get next leaf page */
3562		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
3563		if (rc)
3564			return rc;
3565
3566		/* start with 1st entry of next leaf page */
3567		dtoffset->pn++;
3568		dtoffset->index = 0;
3569	}
3570
3571      out:
3572	/* return target leaf page pinned */
3573	btsp = btstack->top;
3574	btsp->bn = bn;
3575	btsp->index = dtoffset->index;
3576	btsp->mp = mp;
3577
3578	return 0;
3579}
3580
3581
3582/*
3583 *	dtCompare()
3584 *
3585 * function: compare search key with an internal entry
3586 *
3587 * return:
3588 *	< 0 if k is < record
3589 *	= 0 if k is = record
3590 *	> 0 if k is > record
3591 */
3592static int dtCompare(struct component_name * key,	/* search key */
3593		     dtpage_t * p,	/* directory page */
3594		     int si)
3595{				/* entry slot index */
3596	wchar_t *kname;
3597	__le16 *name;
3598	int klen, namlen, len, rc;
3599	struct idtentry *ih;
3600	struct dtslot *t;
3601
3602	/*
3603	 * force the left-most key on internal pages, at any level of
3604	 * the tree, to be less than any search key.
3605	 * this obviates having to update the leftmost key on an internal
3606	 * page when the user inserts a new key in the tree smaller than
3607	 * anything that has been stored.
3608	 *
3609	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3610	 * at any internal page at any level of the tree,
3611	 * it descends to child of the entry anyway -
3612	 * ? make the entry as min size dummy entry)
3613	 *
3614	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3615	 * return (1);
3616	 */
3617
3618	kname = key->name;
3619	klen = key->namlen;
3620
3621	ih = (struct idtentry *) & p->slot[si];
3622	si = ih->next;
3623	name = ih->name;
3624	namlen = ih->namlen;
3625	len = min(namlen, DTIHDRDATALEN);
3626
3627	/* compare with head/only segment */
3628	len = min(klen, len);
3629	if ((rc = UniStrncmp_le(kname, name, len)))
3630		return rc;
3631
3632	klen -= len;
3633	namlen -= len;
3634
3635	/* compare with additional segment(s) */
3636	kname += len;
3637	while (klen > 0 && namlen > 0) {
3638		/* compare with next name segment */
3639		t = (struct dtslot *) & p->slot[si];
3640		len = min(namlen, DTSLOTDATALEN);
3641		len = min(klen, len);
3642		name = t->name;
3643		if ((rc = UniStrncmp_le(kname, name, len)))
3644			return rc;
3645
3646		klen -= len;
3647		namlen -= len;
3648		kname += len;
3649		si = t->next;
3650	}
3651
3652	return (klen - namlen);
3653}
3654
3655
3656
3657
3658/*
3659 *	ciCompare()
3660 *
3661 * function: compare search key with an (leaf/internal) entry
3662 *
3663 * return:
3664 *	< 0 if k is < record
3665 *	= 0 if k is = record
3666 *	> 0 if k is > record
3667 */
3668static int ciCompare(struct component_name * key,	/* search key */
3669		     dtpage_t * p,	/* directory page */
3670		     int si,	/* entry slot index */
3671		     int flag)
3672{
3673	wchar_t *kname, x;
3674	__le16 *name;
3675	int klen, namlen, len, rc;
3676	struct ldtentry *lh;
3677	struct idtentry *ih;
3678	struct dtslot *t;
3679	int i;
3680
3681	/*
3682	 * force the left-most key on internal pages, at any level of
3683	 * the tree, to be less than any search key.
3684	 * this obviates having to update the leftmost key on an internal
3685	 * page when the user inserts a new key in the tree smaller than
3686	 * anything that has been stored.
3687	 *
3688	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
3689	 * at any internal page at any level of the tree,
3690	 * it descends to child of the entry anyway -
3691	 * ? make the entry as min size dummy entry)
3692	 *
3693	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
3694	 * return (1);
3695	 */
3696
3697	kname = key->name;
3698	klen = key->namlen;
3699
3700	/*
3701	 * leaf page entry
3702	 */
3703	if (p->header.flag & BT_LEAF) {
3704		lh = (struct ldtentry *) & p->slot[si];
3705		si = lh->next;
3706		name = lh->name;
3707		namlen = lh->namlen;
3708		if (flag & JFS_DIR_INDEX)
3709			len = min(namlen, DTLHDRDATALEN);
3710		else
3711			len = min(namlen, DTLHDRDATALEN_LEGACY);
3712	}
3713	/*
3714	 * internal page entry
3715	 */
3716	else {
3717		ih = (struct idtentry *) & p->slot[si];
3718		si = ih->next;
3719		name = ih->name;
3720		namlen = ih->namlen;
3721		len = min(namlen, DTIHDRDATALEN);
3722	}
3723
3724	/* compare with head/only segment */
3725	len = min(klen, len);
3726	for (i = 0; i < len; i++, kname++, name++) {
3727		/* only uppercase if case-insensitive support is on */
3728		if ((flag & JFS_OS2) == JFS_OS2)
3729			x = UniToupper(le16_to_cpu(*name));
3730		else
3731			x = le16_to_cpu(*name);
3732		if ((rc = *kname - x))
3733			return rc;
3734	}
3735
3736	klen -= len;
3737	namlen -= len;
3738
3739	/* compare with additional segment(s) */
3740	while (klen > 0 && namlen > 0) {
3741		/* compare with next name segment */
3742		t = (struct dtslot *) & p->slot[si];
3743		len = min(namlen, DTSLOTDATALEN);
3744		len = min(klen, len);
3745		name = t->name;
3746		for (i = 0; i < len; i++, kname++, name++) {
3747			/* only uppercase if case-insensitive support is on */
3748			if ((flag & JFS_OS2) == JFS_OS2)
3749				x = UniToupper(le16_to_cpu(*name));
3750			else
3751				x = le16_to_cpu(*name);
3752
3753			if ((rc = *kname - x))
3754				return rc;
3755		}
3756
3757		klen -= len;
3758		namlen -= len;
3759		si = t->next;
3760	}
3761
3762	return (klen - namlen);
3763}
3764
3765
3766/*
3767 *	ciGetLeafPrefixKey()
3768 *
3769 * function: compute prefix of suffix compression
3770 *	     from two adjacent leaf entries
3771 *	     across page boundary
3772 *
3773 * return: non-zero on error
3774 *
3775 */
3776static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
3777			       int ri, struct component_name * key, int flag)
3778{
3779	int klen, namlen;
3780	wchar_t *pl, *pr, *kname;
3781	struct component_name lkey;
3782	struct component_name rkey;
3783
3784	lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3785					GFP_KERNEL);
3786	if (lkey.name == NULL)
3787		return -ENOMEM;
3788
3789	rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
3790					GFP_KERNEL);
3791	if (rkey.name == NULL) {
3792		kfree(lkey.name);
3793		return -ENOMEM;
3794	}
3795
3796	/* get left and right key */
3797	dtGetKey(lp, li, &lkey, flag);
3798	lkey.name[lkey.namlen] = 0;
3799
3800	if ((flag & JFS_OS2) == JFS_OS2)
3801		ciToUpper(&lkey);
3802
3803	dtGetKey(rp, ri, &rkey, flag);
3804	rkey.name[rkey.namlen] = 0;
3805
3806
3807	if ((flag & JFS_OS2) == JFS_OS2)
3808		ciToUpper(&rkey);
3809
3810	/* compute prefix */
3811	klen = 0;
3812	kname = key->name;
3813	namlen = min(lkey.namlen, rkey.namlen);
3814	for (pl = lkey.name, pr = rkey.name;
3815	     namlen; pl++, pr++, namlen--, klen++, kname++) {
3816		*kname = *pr;
3817		if (*pl != *pr) {
3818			key->namlen = klen + 1;
3819			goto free_names;
3820		}
3821	}
3822
3823	/* l->namlen <= r->namlen since l <= r */
3824	if (lkey.namlen < rkey.namlen) {
3825		*kname = *pr;
3826		key->namlen = klen + 1;
3827	} else			/* l->namelen == r->namelen */
3828		key->namlen = klen;
3829
3830free_names:
3831	kfree(lkey.name);
3832	kfree(rkey.name);
3833	return 0;
3834}
3835
3836
3837
3838/*
3839 *	dtGetKey()
3840 *
3841 * function: get key of the entry
3842 */
3843static void dtGetKey(dtpage_t * p, int i,	/* entry index */
3844		     struct component_name * key, int flag)
3845{
3846	int si;
3847	s8 *stbl;
3848	struct ldtentry *lh;
3849	struct idtentry *ih;
3850	struct dtslot *t;
3851	int namlen, len;
3852	wchar_t *kname;
3853	__le16 *name;
3854
3855	/* get entry */
3856	stbl = DT_GETSTBL(p);
3857	si = stbl[i];
3858	if (p->header.flag & BT_LEAF) {
3859		lh = (struct ldtentry *) & p->slot[si];
3860		si = lh->next;
3861		namlen = lh->namlen;
3862		name = lh->name;
3863		if (flag & JFS_DIR_INDEX)
3864			len = min(namlen, DTLHDRDATALEN);
3865		else
3866			len = min(namlen, DTLHDRDATALEN_LEGACY);
3867	} else {
3868		ih = (struct idtentry *) & p->slot[si];
3869		si = ih->next;
3870		namlen = ih->namlen;
3871		name = ih->name;
3872		len = min(namlen, DTIHDRDATALEN);
3873	}
3874
3875	key->namlen = namlen;
3876	kname = key->name;
3877
3878	/*
3879	 * move head/only segment
3880	 */
3881	UniStrncpy_from_le(kname, name, len);
3882
3883	/*
3884	 * move additional segment(s)
3885	 */
3886	while (si >= 0) {
3887		/* get next segment */
3888		t = &p->slot[si];
3889		kname += len;
3890		namlen -= len;
3891		len = min(namlen, DTSLOTDATALEN);
3892		UniStrncpy_from_le(kname, t->name, len);
3893
3894		si = t->next;
3895	}
3896}
3897
3898
3899/*
3900 *	dtInsertEntry()
3901 *
3902 * function: allocate free slot(s) and
3903 *	     write a leaf/internal entry
3904 *
3905 * return: entry slot index
3906 */
3907static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
3908			  ddata_t * data, struct dt_lock ** dtlock)
3909{
3910	struct dtslot *h, *t;
3911	struct ldtentry *lh = NULL;
3912	struct idtentry *ih = NULL;
3913	int hsi, fsi, klen, len, nextindex;
3914	wchar_t *kname;
3915	__le16 *name;
3916	s8 *stbl;
3917	pxd_t *xd;
3918	struct dt_lock *dtlck = *dtlock;
3919	struct lv *lv;
3920	int xsi, n;
3921	s64 bn = 0;
3922	struct metapage *mp = NULL;
3923
3924	klen = key->namlen;
3925	kname = key->name;
3926
3927	/* allocate a free slot */
3928	hsi = fsi = p->header.freelist;
3929	h = &p->slot[fsi];
3930	p->header.freelist = h->next;
3931	--p->header.freecnt;
3932
3933	/* open new linelock */
3934	if (dtlck->index >= dtlck->maxcnt)
3935		dtlck = (struct dt_lock *) txLinelock(dtlck);
3936
3937	lv = & dtlck->lv[dtlck->index];
3938	lv->offset = hsi;
3939
3940	/* write head/only segment */
3941	if (p->header.flag & BT_LEAF) {
3942		lh = (struct ldtentry *) h;
3943		lh->next = h->next;
3944		lh->inumber = cpu_to_le32(data->leaf.ino);
3945		lh->namlen = klen;
3946		name = lh->name;
3947		if (data->leaf.ip) {
3948			len = min(klen, DTLHDRDATALEN);
3949			if (!(p->header.flag & BT_ROOT))
3950				bn = addressPXD(&p->header.self);
3951			lh->index = cpu_to_le32(add_index(data->leaf.tid,
3952							  data->leaf.ip,
3953							  bn, index));
3954		} else
3955			len = min(klen, DTLHDRDATALEN_LEGACY);
3956	} else {
3957		ih = (struct idtentry *) h;
3958		ih->next = h->next;
3959		xd = (pxd_t *) ih;
3960		*xd = data->xd;
3961		ih->namlen = klen;
3962		name = ih->name;
3963		len = min(klen, DTIHDRDATALEN);
3964	}
3965
3966	UniStrncpy_to_le(name, kname, len);
3967
3968	n = 1;
3969	xsi = hsi;
3970
3971	/* write additional segment(s) */
3972	t = h;
3973	klen -= len;
3974	while (klen) {
3975		/* get free slot */
3976		fsi = p->header.freelist;
3977		t = &p->slot[fsi];
3978		p->header.freelist = t->next;
3979		--p->header.freecnt;
3980
3981		/* is next slot contiguous ? */
3982		if (fsi != xsi + 1) {
3983			/* close current linelock */
3984			lv->length = n;
3985			dtlck->index++;
3986
3987			/* open new linelock */
3988			if (dtlck->index < dtlck->maxcnt)
3989				lv++;
3990			else {
3991				dtlck = (struct dt_lock *) txLinelock(dtlck);
3992				lv = & dtlck->lv[0];
3993			}
3994
3995			lv->offset = fsi;
3996			n = 0;
3997		}
3998
3999		kname += len;
4000		len = min(klen, DTSLOTDATALEN);
4001		UniStrncpy_to_le(t->name, kname, len);
4002
4003		n++;
4004		xsi = fsi;
4005		klen -= len;
4006	}
4007
4008	/* close current linelock */
4009	lv->length = n;
4010	dtlck->index++;
4011
4012	*dtlock = dtlck;
4013
4014	/* terminate last/only segment */
4015	if (h == t) {
4016		/* single segment entry */
4017		if (p->header.flag & BT_LEAF)
4018			lh->next = -1;
4019		else
4020			ih->next = -1;
4021	} else
4022		/* multi-segment entry */
4023		t->next = -1;
4024
4025	/* if insert into middle, shift right succeeding entries in stbl */
4026	stbl = DT_GETSTBL(p);
4027	nextindex = p->header.nextindex;
4028	if (index < nextindex) {
4029		memmove(stbl + index + 1, stbl + index, nextindex - index);
4030
4031		if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
4032			s64 lblock;
4033
4034			/*
4035			 * Need to update slot number for entries that moved
4036			 * in the stbl
4037			 */
4038			mp = NULL;
4039			for (n = index + 1; n <= nextindex; n++) {
4040				lh = (struct ldtentry *) & (p->slot[stbl[n]]);
4041				modify_index(data->leaf.tid, data->leaf.ip,
4042					     le32_to_cpu(lh->index), bn, n,
4043					     &mp, &lblock);
4044			}
4045			if (mp)
4046				release_metapage(mp);
4047		}
4048	}
4049
4050	stbl[index] = hsi;
4051
4052	/* advance next available entry index of stbl */
4053	++p->header.nextindex;
4054}
4055
4056
4057/*
4058 *	dtMoveEntry()
4059 *
4060 * function: move entries from split/left page to new/right page
4061 *
4062 *	nextindex of dst page and freelist/freecnt of both pages
4063 *	are updated.
4064 */
4065static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
4066			struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
4067			int do_index)
4068{
4069	int ssi, next;		/* src slot index */
4070	int di;			/* dst entry index */
4071	int dsi;		/* dst slot index */
4072	s8 *sstbl, *dstbl;	/* sorted entry table */
4073	int snamlen, len;
4074	struct ldtentry *slh, *dlh = NULL;
4075	struct idtentry *sih, *dih = NULL;
4076	struct dtslot *h, *s, *d;
4077	struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
4078	struct lv *slv, *dlv;
4079	int xssi, ns, nd;
4080	int sfsi;
4081
4082	sstbl = (s8 *) & sp->slot[sp->header.stblindex];
4083	dstbl = (s8 *) & dp->slot[dp->header.stblindex];
4084
4085	dsi = dp->header.freelist;	/* first (whole page) free slot */
4086	sfsi = sp->header.freelist;
4087
4088	/* linelock destination entry slot */
4089	dlv = & ddtlck->lv[ddtlck->index];
4090	dlv->offset = dsi;
4091
4092	/* linelock source entry slot */
4093	slv = & sdtlck->lv[sdtlck->index];
4094	slv->offset = sstbl[si];
4095	xssi = slv->offset - 1;
4096
4097	/*
4098	 * move entries
4099	 */
4100	ns = nd = 0;
4101	for (di = 0; si < sp->header.nextindex; si++, di++) {
4102		ssi = sstbl[si];
4103		dstbl[di] = dsi;
4104
4105		/* is next slot contiguous ? */
4106		if (ssi != xssi + 1) {
4107			/* close current linelock */
4108			slv->length = ns;
4109			sdtlck->index++;
4110
4111			/* open new linelock */
4112			if (sdtlck->index < sdtlck->maxcnt)
4113				slv++;
4114			else {
4115				sdtlck = (struct dt_lock *) txLinelock(sdtlck);
4116				slv = & sdtlck->lv[0];
4117			}
4118
4119			slv->offset = ssi;
4120			ns = 0;
4121		}
4122
4123		/*
4124		 * move head/only segment of an entry
4125		 */
4126		/* get dst slot */
4127		h = d = &dp->slot[dsi];
4128
4129		/* get src slot and move */
4130		s = &sp->slot[ssi];
4131		if (sp->header.flag & BT_LEAF) {
4132			/* get source entry */
4133			slh = (struct ldtentry *) s;
4134			dlh = (struct ldtentry *) h;
4135			snamlen = slh->namlen;
4136
4137			if (do_index) {
4138				len = min(snamlen, DTLHDRDATALEN);
4139				dlh->index = slh->index; /* little-endian */
4140			} else
4141				len = min(snamlen, DTLHDRDATALEN_LEGACY);
4142
4143			memcpy(dlh, slh, 6 + len * 2);
4144
4145			next = slh->next;
4146
4147			/* update dst head/only segment next field */
4148			dsi++;
4149			dlh->next = dsi;
4150		} else {
4151			sih = (struct idtentry *) s;
4152			snamlen = sih->namlen;
4153
4154			len = min(snamlen, DTIHDRDATALEN);
4155			dih = (struct idtentry *) h;
4156			memcpy(dih, sih, 10 + len * 2);
4157			next = sih->next;
4158
4159			dsi++;
4160			dih->next = dsi;
4161		}
4162
4163		/* free src head/only segment */
4164		s->next = sfsi;
4165		s->cnt = 1;
4166		sfsi = ssi;
4167
4168		ns++;
4169		nd++;
4170		xssi = ssi;
4171
4172		/*
4173		 * move additional segment(s) of the entry
4174		 */
4175		snamlen -= len;
4176		while ((ssi = next) >= 0) {
4177			/* is next slot contiguous ? */
4178			if (ssi != xssi + 1) {
4179				/* close current linelock */
4180				slv->length = ns;
4181				sdtlck->index++;
4182
4183				/* open new linelock */
4184				if (sdtlck->index < sdtlck->maxcnt)
4185					slv++;
4186				else {
4187					sdtlck =
4188					    (struct dt_lock *)
4189					    txLinelock(sdtlck);
4190					slv = & sdtlck->lv[0];
4191				}
4192
4193				slv->offset = ssi;
4194				ns = 0;
4195			}
4196
4197			/* get next source segment */
4198			s = &sp->slot[ssi];
4199
4200			/* get next destination free slot */
4201			d++;
4202
4203			len = min(snamlen, DTSLOTDATALEN);
4204			UniStrncpy_le(d->name, s->name, len);
4205
4206			ns++;
4207			nd++;
4208			xssi = ssi;
4209
4210			dsi++;
4211			d->next = dsi;
4212
4213			/* free source segment */
4214			next = s->next;
4215			s->next = sfsi;
4216			s->cnt = 1;
4217			sfsi = ssi;
4218
4219			snamlen -= len;
4220		}		/* end while */
4221
4222		/* terminate dst last/only segment */
4223		if (h == d) {
4224			/* single segment entry */
4225			if (dp->header.flag & BT_LEAF)
4226				dlh->next = -1;
4227			else
4228				dih->next = -1;
4229		} else
4230			/* multi-segment entry */
4231			d->next = -1;
4232	}			/* end for */
4233
4234	/* close current linelock */
4235	slv->length = ns;
4236	sdtlck->index++;
4237	*sdtlock = sdtlck;
4238
4239	dlv->length = nd;
4240	ddtlck->index++;
4241	*ddtlock = ddtlck;
4242
4243	/* update source header */
4244	sp->header.freelist = sfsi;
4245	sp->header.freecnt += nd;
4246
4247	/* update destination header */
4248	dp->header.nextindex = di;
4249
4250	dp->header.freelist = dsi;
4251	dp->header.freecnt -= nd;
4252}
4253
4254
4255/*
4256 *	dtDeleteEntry()
4257 *
4258 * function: free a (leaf/internal) entry
4259 *
4260 * log freelist header, stbl, and each segment slot of entry
4261 * (even though last/only segment next field is modified,
4262 * physical image logging requires all segment slots of
4263 * the entry logged to avoid applying previous updates
4264 * to the same slots)
4265 */
4266static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
4267{
4268	int fsi;		/* free entry slot index */
4269	s8 *stbl;
4270	struct dtslot *t;
4271	int si, freecnt;
4272	struct dt_lock *dtlck = *dtlock;
4273	struct lv *lv;
4274	int xsi, n;
4275
4276	/* get free entry slot index */
4277	stbl = DT_GETSTBL(p);
4278	fsi = stbl[fi];
4279
4280	/* open new linelock */
4281	if (dtlck->index >= dtlck->maxcnt)
4282		dtlck = (struct dt_lock *) txLinelock(dtlck);
4283	lv = & dtlck->lv[dtlck->index];
4284
4285	lv->offset = fsi;
4286
4287	/* get the head/only segment */
4288	t = &p->slot[fsi];
4289	if (p->header.flag & BT_LEAF)
4290		si = ((struct ldtentry *) t)->next;
4291	else
4292		si = ((struct idtentry *) t)->next;
4293	t->next = si;
4294	t->cnt = 1;
4295
4296	n = freecnt = 1;
4297	xsi = fsi;
4298
4299	/* find the last/only segment */
4300	while (si >= 0) {
4301		/* is next slot contiguous ? */
4302		if (si != xsi + 1) {
4303			/* close current linelock */
4304			lv->length = n;
4305			dtlck->index++;
4306
4307			/* open new linelock */
4308			if (dtlck->index < dtlck->maxcnt)
4309				lv++;
4310			else {
4311				dtlck = (struct dt_lock *) txLinelock(dtlck);
4312				lv = & dtlck->lv[0];
4313			}
4314
4315			lv->offset = si;
4316			n = 0;
4317		}
4318
4319		n++;
4320		xsi = si;
4321		freecnt++;
4322
4323		t = &p->slot[si];
4324		t->cnt = 1;
4325		si = t->next;
4326	}
4327
4328	/* close current linelock */
4329	lv->length = n;
4330	dtlck->index++;
4331
4332	*dtlock = dtlck;
4333
4334	/* update freelist */
4335	t->next = p->header.freelist;
4336	p->header.freelist = fsi;
4337	p->header.freecnt += freecnt;
4338
4339	/* if delete from middle,
4340	 * shift left the succedding entries in the stbl
4341	 */
4342	si = p->header.nextindex;
4343	if (fi < si - 1)
4344		memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
4345
4346	p->header.nextindex--;
4347}
4348
4349
4350/*
4351 *	dtTruncateEntry()
4352 *
4353 * function: truncate a (leaf/internal) entry
4354 *
4355 * log freelist header, stbl, and each segment slot of entry
4356 * (even though last/only segment next field is modified,
4357 * physical image logging requires all segment slots of
4358 * the entry logged to avoid applying previous updates
4359 * to the same slots)
4360 */
4361static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
4362{
4363	int tsi;		/* truncate entry slot index */
4364	s8 *stbl;
4365	struct dtslot *t;
4366	int si, freecnt;
4367	struct dt_lock *dtlck = *dtlock;
4368	struct lv *lv;
4369	int fsi, xsi, n;
4370
4371	/* get free entry slot index */
4372	stbl = DT_GETSTBL(p);
4373	tsi = stbl[ti];
4374
4375	/* open new linelock */
4376	if (dtlck->index >= dtlck->maxcnt)
4377		dtlck = (struct dt_lock *) txLinelock(dtlck);
4378	lv = & dtlck->lv[dtlck->index];
4379
4380	lv->offset = tsi;
4381
4382	/* get the head/only segment */
4383	t = &p->slot[tsi];
4384	ASSERT(p->header.flag & BT_INTERNAL);
4385	((struct idtentry *) t)->namlen = 0;
4386	si = ((struct idtentry *) t)->next;
4387	((struct idtentry *) t)->next = -1;
4388
4389	n = 1;
4390	freecnt = 0;
4391	fsi = si;
4392	xsi = tsi;
4393
4394	/* find the last/only segment */
4395	while (si >= 0) {
4396		/* is next slot contiguous ? */
4397		if (si != xsi + 1) {
4398			/* close current linelock */
4399			lv->length = n;
4400			dtlck->index++;
4401
4402			/* open new linelock */
4403			if (dtlck->index < dtlck->maxcnt)
4404				lv++;
4405			else {
4406				dtlck = (struct dt_lock *) txLinelock(dtlck);
4407				lv = & dtlck->lv[0];
4408			}
4409
4410			lv->offset = si;
4411			n = 0;
4412		}
4413
4414		n++;
4415		xsi = si;
4416		freecnt++;
4417
4418		t = &p->slot[si];
4419		t->cnt = 1;
4420		si = t->next;
4421	}
4422
4423	/* close current linelock */
4424	lv->length = n;
4425	dtlck->index++;
4426
4427	*dtlock = dtlck;
4428
4429	/* update freelist */
4430	if (freecnt == 0)
4431		return;
4432	t->next = p->header.freelist;
4433	p->header.freelist = fsi;
4434	p->header.freecnt += freecnt;
4435}
4436
4437
4438/*
4439 *	dtLinelockFreelist()
4440 */
4441static void dtLinelockFreelist(dtpage_t * p,	/* directory page */
4442			       int m,	/* max slot index */
4443			       struct dt_lock ** dtlock)
4444{
4445	int fsi;		/* free entry slot index */
4446	struct dtslot *t;
4447	int si;
4448	struct dt_lock *dtlck = *dtlock;
4449	struct lv *lv;
4450	int xsi, n;
4451
4452	/* get free entry slot index */
4453	fsi = p->header.freelist;
4454
4455	/* open new linelock */
4456	if (dtlck->index >= dtlck->maxcnt)
4457		dtlck = (struct dt_lock *) txLinelock(dtlck);
4458	lv = & dtlck->lv[dtlck->index];
4459
4460	lv->offset = fsi;
4461
4462	n = 1;
4463	xsi = fsi;
4464
4465	t = &p->slot[fsi];
4466	si = t->next;
4467
4468	/* find the last/only segment */
4469	while (si < m && si >= 0) {
4470		/* is next slot contiguous ? */
4471		if (si != xsi + 1) {
4472			/* close current linelock */
4473			lv->length = n;
4474			dtlck->index++;
4475
4476			/* open new linelock */
4477			if (dtlck->index < dtlck->maxcnt)
4478				lv++;
4479			else {
4480				dtlck = (struct dt_lock *) txLinelock(dtlck);
4481				lv = & dtlck->lv[0];
4482			}
4483
4484			lv->offset = si;
4485			n = 0;
4486		}
4487
4488		n++;
4489		xsi = si;
4490
4491		t = &p->slot[si];
4492		si = t->next;
4493	}
4494
4495	/* close current linelock */
4496	lv->length = n;
4497	dtlck->index++;
4498
4499	*dtlock = dtlck;
4500}
4501
4502
4503/*
4504 * NAME: dtModify
4505 *
4506 * FUNCTION: Modify the inode number part of a directory entry
4507 *
4508 * PARAMETERS:
4509 *	tid	- Transaction id
4510 *	ip	- Inode of parent directory
4511 *	key	- Name of entry to be modified
4512 *	orig_ino	- Original inode number expected in entry
4513 *	new_ino	- New inode number to put into entry
4514 *	flag	- JFS_RENAME
4515 *
4516 * RETURNS:
4517 *	-ESTALE	- If entry found does not match orig_ino passed in
4518 *	-ENOENT	- If no entry can be found to match key
4519 *	0	- If successfully modified entry
4520 */
4521int dtModify(tid_t tid, struct inode *ip,
4522	 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
4523{
4524	int rc;
4525	s64 bn;
4526	struct metapage *mp;
4527	dtpage_t *p;
4528	int index;
4529	struct btstack btstack;
4530	struct tlock *tlck;
4531	struct dt_lock *dtlck;
4532	struct lv *lv;
4533	s8 *stbl;
4534	int entry_si;		/* entry slot index */
4535	struct ldtentry *entry;
4536
4537	/*
4538	 *	search for the entry to modify:
4539	 *
4540	 * dtSearch() returns (leaf page pinned, index at which to modify).
4541	 */
4542	if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
4543		return rc;
4544
4545	/* retrieve search result */
4546	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
4547
4548	BT_MARK_DIRTY(mp, ip);
4549	/*
4550	 * acquire a transaction lock on the leaf page of named entry
4551	 */
4552	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
4553	dtlck = (struct dt_lock *) & tlck->lock;
4554
4555	/* get slot index of the entry */
4556	stbl = DT_GETSTBL(p);
4557	entry_si = stbl[index];
4558
4559	/* linelock entry */
4560	ASSERT(dtlck->index == 0);
4561	lv = & dtlck->lv[0];
4562	lv->offset = entry_si;
4563	lv->length = 1;
4564	dtlck->index++;
4565
4566	/* get the head/only segment */
4567	entry = (struct ldtentry *) & p->slot[entry_si];
4568
4569	/* substitute the inode number of the entry */
4570	entry->inumber = cpu_to_le32(new_ino);
4571
4572	/* unpin the leaf page */
4573	DT_PUTPAGE(mp);
4574
4575	return 0;
4576}
4577