1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include <linux/log2.h>
19
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_mount.h"
26#include "xfs_inode.h"
27#include "xfs_trans.h"
28#include "xfs_inode_item.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_bmap.h"
31#include "xfs_error.h"
32#include "xfs_trace.h"
33#include "xfs_attr_sf.h"
34
35kmem_zone_t *xfs_ifork_zone;
36
37STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
38STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
39STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
40
41#ifdef DEBUG
42/*
43 * Make sure that the extents in the given memory buffer
44 * are valid.
45 */
46void
47xfs_validate_extents(
48	xfs_ifork_t		*ifp,
49	int			nrecs,
50	xfs_exntfmt_t		fmt)
51{
52	xfs_bmbt_irec_t		irec;
53	xfs_bmbt_rec_host_t	rec;
54	int			i;
55
56	for (i = 0; i < nrecs; i++) {
57		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
58		rec.l0 = get_unaligned(&ep->l0);
59		rec.l1 = get_unaligned(&ep->l1);
60		xfs_bmbt_get_all(&rec, &irec);
61		if (fmt == XFS_EXTFMT_NOSTATE)
62			ASSERT(irec.br_state == XFS_EXT_NORM);
63	}
64}
65#else /* DEBUG */
66#define xfs_validate_extents(ifp, nrecs, fmt)
67#endif /* DEBUG */
68
69
70/*
71 * Move inode type and inode format specific information from the
72 * on-disk inode to the in-core inode.  For fifos, devs, and sockets
73 * this means set if_rdev to the proper value.  For files, directories,
74 * and symlinks this means to bring in the in-line data or extent
75 * pointers.  For a file in B-tree format, only the root is immediately
76 * brought in-core.  The rest will be in-lined in if_extents when it
77 * is first referenced (see xfs_iread_extents()).
78 */
79int
80xfs_iformat_fork(
81	xfs_inode_t		*ip,
82	xfs_dinode_t		*dip)
83{
84	xfs_attr_shortform_t	*atp;
85	int			size;
86	int			error = 0;
87	xfs_fsize_t             di_size;
88
89	if (unlikely(be32_to_cpu(dip->di_nextents) +
90		     be16_to_cpu(dip->di_anextents) >
91		     be64_to_cpu(dip->di_nblocks))) {
92		xfs_warn(ip->i_mount,
93			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
94			(unsigned long long)ip->i_ino,
95			(int)(be32_to_cpu(dip->di_nextents) +
96			      be16_to_cpu(dip->di_anextents)),
97			(unsigned long long)
98				be64_to_cpu(dip->di_nblocks));
99		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
100				     ip->i_mount, dip);
101		return -EFSCORRUPTED;
102	}
103
104	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
105		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
106			(unsigned long long)ip->i_ino,
107			dip->di_forkoff);
108		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
109				     ip->i_mount, dip);
110		return -EFSCORRUPTED;
111	}
112
113	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
114		     !ip->i_mount->m_rtdev_targp)) {
115		xfs_warn(ip->i_mount,
116			"corrupt dinode %Lu, has realtime flag set.",
117			ip->i_ino);
118		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
119				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
120		return -EFSCORRUPTED;
121	}
122
123	switch (ip->i_d.di_mode & S_IFMT) {
124	case S_IFIFO:
125	case S_IFCHR:
126	case S_IFBLK:
127	case S_IFSOCK:
128		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
129			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
130					      ip->i_mount, dip);
131			return -EFSCORRUPTED;
132		}
133		ip->i_d.di_size = 0;
134		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
135		break;
136
137	case S_IFREG:
138	case S_IFLNK:
139	case S_IFDIR:
140		switch (dip->di_format) {
141		case XFS_DINODE_FMT_LOCAL:
142			/*
143			 * no local regular files yet
144			 */
145			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
146				xfs_warn(ip->i_mount,
147			"corrupt inode %Lu (local format for regular file).",
148					(unsigned long long) ip->i_ino);
149				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
150						     XFS_ERRLEVEL_LOW,
151						     ip->i_mount, dip);
152				return -EFSCORRUPTED;
153			}
154
155			di_size = be64_to_cpu(dip->di_size);
156			if (unlikely(di_size < 0 ||
157				     di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
158				xfs_warn(ip->i_mount,
159			"corrupt inode %Lu (bad size %Ld for local inode).",
160					(unsigned long long) ip->i_ino,
161					(long long) di_size);
162				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
163						     XFS_ERRLEVEL_LOW,
164						     ip->i_mount, dip);
165				return -EFSCORRUPTED;
166			}
167
168			size = (int)di_size;
169			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
170			break;
171		case XFS_DINODE_FMT_EXTENTS:
172			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
173			break;
174		case XFS_DINODE_FMT_BTREE:
175			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
176			break;
177		default:
178			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
179					 ip->i_mount);
180			return -EFSCORRUPTED;
181		}
182		break;
183
184	default:
185		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
186		return -EFSCORRUPTED;
187	}
188	if (error) {
189		return error;
190	}
191	if (!XFS_DFORK_Q(dip))
192		return 0;
193
194	ASSERT(ip->i_afp == NULL);
195	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
196
197	switch (dip->di_aformat) {
198	case XFS_DINODE_FMT_LOCAL:
199		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
200		size = be16_to_cpu(atp->hdr.totsize);
201
202		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
203			xfs_warn(ip->i_mount,
204				"corrupt inode %Lu (bad attr fork size %Ld).",
205				(unsigned long long) ip->i_ino,
206				(long long) size);
207			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
208					     XFS_ERRLEVEL_LOW,
209					     ip->i_mount, dip);
210			return -EFSCORRUPTED;
211		}
212
213		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
214		break;
215	case XFS_DINODE_FMT_EXTENTS:
216		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
217		break;
218	case XFS_DINODE_FMT_BTREE:
219		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
220		break;
221	default:
222		error = -EFSCORRUPTED;
223		break;
224	}
225	if (error) {
226		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
227		ip->i_afp = NULL;
228		xfs_idestroy_fork(ip, XFS_DATA_FORK);
229	}
230	return error;
231}
232
233/*
234 * The file is in-lined in the on-disk inode.
235 * If it fits into if_inline_data, then copy
236 * it there, otherwise allocate a buffer for it
237 * and copy the data there.  Either way, set
238 * if_data to point at the data.
239 * If we allocate a buffer for the data, make
240 * sure that its size is a multiple of 4 and
241 * record the real size in i_real_bytes.
242 */
243STATIC int
244xfs_iformat_local(
245	xfs_inode_t	*ip,
246	xfs_dinode_t	*dip,
247	int		whichfork,
248	int		size)
249{
250	xfs_ifork_t	*ifp;
251	int		real_size;
252
253	/*
254	 * If the size is unreasonable, then something
255	 * is wrong and we just bail out rather than crash in
256	 * kmem_alloc() or memcpy() below.
257	 */
258	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
259		xfs_warn(ip->i_mount,
260	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
261			(unsigned long long) ip->i_ino, size,
262			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
263		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
264				     ip->i_mount, dip);
265		return -EFSCORRUPTED;
266	}
267	ifp = XFS_IFORK_PTR(ip, whichfork);
268	real_size = 0;
269	if (size == 0)
270		ifp->if_u1.if_data = NULL;
271	else if (size <= sizeof(ifp->if_u2.if_inline_data))
272		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
273	else {
274		real_size = roundup(size, 4);
275		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
276	}
277	ifp->if_bytes = size;
278	ifp->if_real_bytes = real_size;
279	if (size)
280		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
281	ifp->if_flags &= ~XFS_IFEXTENTS;
282	ifp->if_flags |= XFS_IFINLINE;
283	return 0;
284}
285
286/*
287 * The file consists of a set of extents all
288 * of which fit into the on-disk inode.
289 * If there are few enough extents to fit into
290 * the if_inline_ext, then copy them there.
291 * Otherwise allocate a buffer for them and copy
292 * them into it.  Either way, set if_extents
293 * to point at the extents.
294 */
295STATIC int
296xfs_iformat_extents(
297	xfs_inode_t	*ip,
298	xfs_dinode_t	*dip,
299	int		whichfork)
300{
301	xfs_bmbt_rec_t	*dp;
302	xfs_ifork_t	*ifp;
303	int		nex;
304	int		size;
305	int		i;
306
307	ifp = XFS_IFORK_PTR(ip, whichfork);
308	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
309	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
310
311	/*
312	 * If the number of extents is unreasonable, then something
313	 * is wrong and we just bail out rather than crash in
314	 * kmem_alloc() or memcpy() below.
315	 */
316	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
317		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
318			(unsigned long long) ip->i_ino, nex);
319		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
320				     ip->i_mount, dip);
321		return -EFSCORRUPTED;
322	}
323
324	ifp->if_real_bytes = 0;
325	if (nex == 0)
326		ifp->if_u1.if_extents = NULL;
327	else if (nex <= XFS_INLINE_EXTS)
328		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
329	else
330		xfs_iext_add(ifp, 0, nex);
331
332	ifp->if_bytes = size;
333	if (size) {
334		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
335		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
336		for (i = 0; i < nex; i++, dp++) {
337			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
338			ep->l0 = get_unaligned_be64(&dp->l0);
339			ep->l1 = get_unaligned_be64(&dp->l1);
340		}
341		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
342		if (whichfork != XFS_DATA_FORK ||
343			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
344				if (unlikely(xfs_check_nostate_extents(
345				    ifp, 0, nex))) {
346					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
347							 XFS_ERRLEVEL_LOW,
348							 ip->i_mount);
349					return -EFSCORRUPTED;
350				}
351	}
352	ifp->if_flags |= XFS_IFEXTENTS;
353	return 0;
354}
355
356/*
357 * The file has too many extents to fit into
358 * the inode, so they are in B-tree format.
359 * Allocate a buffer for the root of the B-tree
360 * and copy the root into it.  The i_extents
361 * field will remain NULL until all of the
362 * extents are read in (when they are needed).
363 */
364STATIC int
365xfs_iformat_btree(
366	xfs_inode_t		*ip,
367	xfs_dinode_t		*dip,
368	int			whichfork)
369{
370	struct xfs_mount	*mp = ip->i_mount;
371	xfs_bmdr_block_t	*dfp;
372	xfs_ifork_t		*ifp;
373	/* REFERENCED */
374	int			nrecs;
375	int			size;
376
377	ifp = XFS_IFORK_PTR(ip, whichfork);
378	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
379	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
380	nrecs = be16_to_cpu(dfp->bb_numrecs);
381
382	/*
383	 * blow out if -- fork has less extents than can fit in
384	 * fork (fork shouldn't be a btree format), root btree
385	 * block has more records than can fit into the fork,
386	 * or the number of extents is greater than the number of
387	 * blocks.
388	 */
389	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
390					XFS_IFORK_MAXEXT(ip, whichfork) ||
391		     XFS_BMDR_SPACE_CALC(nrecs) >
392					XFS_DFORK_SIZE(dip, mp, whichfork) ||
393		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
394		xfs_warn(mp, "corrupt inode %Lu (btree).",
395					(unsigned long long) ip->i_ino);
396		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
397					 mp, dip);
398		return -EFSCORRUPTED;
399	}
400
401	ifp->if_broot_bytes = size;
402	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
403	ASSERT(ifp->if_broot != NULL);
404	/*
405	 * Copy and convert from the on-disk structure
406	 * to the in-memory structure.
407	 */
408	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
409			 ifp->if_broot, size);
410	ifp->if_flags &= ~XFS_IFEXTENTS;
411	ifp->if_flags |= XFS_IFBROOT;
412
413	return 0;
414}
415
416/*
417 * Read in extents from a btree-format inode.
418 * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
419 */
420int
421xfs_iread_extents(
422	xfs_trans_t	*tp,
423	xfs_inode_t	*ip,
424	int		whichfork)
425{
426	int		error;
427	xfs_ifork_t	*ifp;
428	xfs_extnum_t	nextents;
429
430	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
431
432	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
433		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
434				 ip->i_mount);
435		return -EFSCORRUPTED;
436	}
437	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
438	ifp = XFS_IFORK_PTR(ip, whichfork);
439
440	/*
441	 * We know that the size is valid (it's checked in iformat_btree)
442	 */
443	ifp->if_bytes = ifp->if_real_bytes = 0;
444	ifp->if_flags |= XFS_IFEXTENTS;
445	xfs_iext_add(ifp, 0, nextents);
446	error = xfs_bmap_read_extents(tp, ip, whichfork);
447	if (error) {
448		xfs_iext_destroy(ifp);
449		ifp->if_flags &= ~XFS_IFEXTENTS;
450		return error;
451	}
452	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
453	return 0;
454}
455/*
456 * Reallocate the space for if_broot based on the number of records
457 * being added or deleted as indicated in rec_diff.  Move the records
458 * and pointers in if_broot to fit the new size.  When shrinking this
459 * will eliminate holes between the records and pointers created by
460 * the caller.  When growing this will create holes to be filled in
461 * by the caller.
462 *
463 * The caller must not request to add more records than would fit in
464 * the on-disk inode root.  If the if_broot is currently NULL, then
465 * if we are adding records, one will be allocated.  The caller must also
466 * not request that the number of records go below zero, although
467 * it can go to zero.
468 *
469 * ip -- the inode whose if_broot area is changing
470 * ext_diff -- the change in the number of records, positive or negative,
471 *	 requested for the if_broot array.
472 */
473void
474xfs_iroot_realloc(
475	xfs_inode_t		*ip,
476	int			rec_diff,
477	int			whichfork)
478{
479	struct xfs_mount	*mp = ip->i_mount;
480	int			cur_max;
481	xfs_ifork_t		*ifp;
482	struct xfs_btree_block	*new_broot;
483	int			new_max;
484	size_t			new_size;
485	char			*np;
486	char			*op;
487
488	/*
489	 * Handle the degenerate case quietly.
490	 */
491	if (rec_diff == 0) {
492		return;
493	}
494
495	ifp = XFS_IFORK_PTR(ip, whichfork);
496	if (rec_diff > 0) {
497		/*
498		 * If there wasn't any memory allocated before, just
499		 * allocate it now and get out.
500		 */
501		if (ifp->if_broot_bytes == 0) {
502			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
503			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
504			ifp->if_broot_bytes = (int)new_size;
505			return;
506		}
507
508		/*
509		 * If there is already an existing if_broot, then we need
510		 * to realloc() it and shift the pointers to their new
511		 * location.  The records don't change location because
512		 * they are kept butted up against the btree block header.
513		 */
514		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
515		new_max = cur_max + rec_diff;
516		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
517		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
518				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
519				KM_SLEEP | KM_NOFS);
520		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
521						     ifp->if_broot_bytes);
522		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
523						     (int)new_size);
524		ifp->if_broot_bytes = (int)new_size;
525		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
526			XFS_IFORK_SIZE(ip, whichfork));
527		memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
528		return;
529	}
530
531	/*
532	 * rec_diff is less than 0.  In this case, we are shrinking the
533	 * if_broot buffer.  It must already exist.  If we go to zero
534	 * records, just get rid of the root and clear the status bit.
535	 */
536	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
537	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
538	new_max = cur_max + rec_diff;
539	ASSERT(new_max >= 0);
540	if (new_max > 0)
541		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
542	else
543		new_size = 0;
544	if (new_size > 0) {
545		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
546		/*
547		 * First copy over the btree block header.
548		 */
549		memcpy(new_broot, ifp->if_broot,
550			XFS_BMBT_BLOCK_LEN(ip->i_mount));
551	} else {
552		new_broot = NULL;
553		ifp->if_flags &= ~XFS_IFBROOT;
554	}
555
556	/*
557	 * Only copy the records and pointers if there are any.
558	 */
559	if (new_max > 0) {
560		/*
561		 * First copy the records.
562		 */
563		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
564		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
565		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
566
567		/*
568		 * Then copy the pointers.
569		 */
570		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
571						     ifp->if_broot_bytes);
572		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
573						     (int)new_size);
574		memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
575	}
576	kmem_free(ifp->if_broot);
577	ifp->if_broot = new_broot;
578	ifp->if_broot_bytes = (int)new_size;
579	if (ifp->if_broot)
580		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
581			XFS_IFORK_SIZE(ip, whichfork));
582	return;
583}
584
585
586/*
587 * This is called when the amount of space needed for if_data
588 * is increased or decreased.  The change in size is indicated by
589 * the number of bytes that need to be added or deleted in the
590 * byte_diff parameter.
591 *
592 * If the amount of space needed has decreased below the size of the
593 * inline buffer, then switch to using the inline buffer.  Otherwise,
594 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
595 * to what is needed.
596 *
597 * ip -- the inode whose if_data area is changing
598 * byte_diff -- the change in the number of bytes, positive or negative,
599 *	 requested for the if_data array.
600 */
601void
602xfs_idata_realloc(
603	xfs_inode_t	*ip,
604	int		byte_diff,
605	int		whichfork)
606{
607	xfs_ifork_t	*ifp;
608	int		new_size;
609	int		real_size;
610
611	if (byte_diff == 0) {
612		return;
613	}
614
615	ifp = XFS_IFORK_PTR(ip, whichfork);
616	new_size = (int)ifp->if_bytes + byte_diff;
617	ASSERT(new_size >= 0);
618
619	if (new_size == 0) {
620		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
621			kmem_free(ifp->if_u1.if_data);
622		}
623		ifp->if_u1.if_data = NULL;
624		real_size = 0;
625	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
626		/*
627		 * If the valid extents/data can fit in if_inline_ext/data,
628		 * copy them from the malloc'd vector and free it.
629		 */
630		if (ifp->if_u1.if_data == NULL) {
631			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
632		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
633			ASSERT(ifp->if_real_bytes != 0);
634			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
635			      new_size);
636			kmem_free(ifp->if_u1.if_data);
637			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
638		}
639		real_size = 0;
640	} else {
641		/*
642		 * Stuck with malloc/realloc.
643		 * For inline data, the underlying buffer must be
644		 * a multiple of 4 bytes in size so that it can be
645		 * logged and stay on word boundaries.  We enforce
646		 * that here.
647		 */
648		real_size = roundup(new_size, 4);
649		if (ifp->if_u1.if_data == NULL) {
650			ASSERT(ifp->if_real_bytes == 0);
651			ifp->if_u1.if_data = kmem_alloc(real_size,
652							KM_SLEEP | KM_NOFS);
653		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
654			/*
655			 * Only do the realloc if the underlying size
656			 * is really changing.
657			 */
658			if (ifp->if_real_bytes != real_size) {
659				ifp->if_u1.if_data =
660					kmem_realloc(ifp->if_u1.if_data,
661							real_size,
662							ifp->if_real_bytes,
663							KM_SLEEP | KM_NOFS);
664			}
665		} else {
666			ASSERT(ifp->if_real_bytes == 0);
667			ifp->if_u1.if_data = kmem_alloc(real_size,
668							KM_SLEEP | KM_NOFS);
669			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
670				ifp->if_bytes);
671		}
672	}
673	ifp->if_real_bytes = real_size;
674	ifp->if_bytes = new_size;
675	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
676}
677
678void
679xfs_idestroy_fork(
680	xfs_inode_t	*ip,
681	int		whichfork)
682{
683	xfs_ifork_t	*ifp;
684
685	ifp = XFS_IFORK_PTR(ip, whichfork);
686	if (ifp->if_broot != NULL) {
687		kmem_free(ifp->if_broot);
688		ifp->if_broot = NULL;
689	}
690
691	/*
692	 * If the format is local, then we can't have an extents
693	 * array so just look for an inline data array.  If we're
694	 * not local then we may or may not have an extents list,
695	 * so check and free it up if we do.
696	 */
697	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
698		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
699		    (ifp->if_u1.if_data != NULL)) {
700			ASSERT(ifp->if_real_bytes != 0);
701			kmem_free(ifp->if_u1.if_data);
702			ifp->if_u1.if_data = NULL;
703			ifp->if_real_bytes = 0;
704		}
705	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
706		   ((ifp->if_flags & XFS_IFEXTIREC) ||
707		    ((ifp->if_u1.if_extents != NULL) &&
708		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
709		ASSERT(ifp->if_real_bytes != 0);
710		xfs_iext_destroy(ifp);
711	}
712	ASSERT(ifp->if_u1.if_extents == NULL ||
713	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
714	ASSERT(ifp->if_real_bytes == 0);
715	if (whichfork == XFS_ATTR_FORK) {
716		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
717		ip->i_afp = NULL;
718	}
719}
720
721/*
722 * Convert in-core extents to on-disk form
723 *
724 * For either the data or attr fork in extent format, we need to endian convert
725 * the in-core extent as we place them into the on-disk inode.
726 *
727 * In the case of the data fork, the in-core and on-disk fork sizes can be
728 * different due to delayed allocation extents. We only copy on-disk extents
729 * here, so callers must always use the physical fork size to determine the
730 * size of the buffer passed to this routine.  We will return the size actually
731 * used.
732 */
733int
734xfs_iextents_copy(
735	xfs_inode_t		*ip,
736	xfs_bmbt_rec_t		*dp,
737	int			whichfork)
738{
739	int			copied;
740	int			i;
741	xfs_ifork_t		*ifp;
742	int			nrecs;
743	xfs_fsblock_t		start_block;
744
745	ifp = XFS_IFORK_PTR(ip, whichfork);
746	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
747	ASSERT(ifp->if_bytes > 0);
748
749	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
750	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
751	ASSERT(nrecs > 0);
752
753	/*
754	 * There are some delayed allocation extents in the
755	 * inode, so copy the extents one at a time and skip
756	 * the delayed ones.  There must be at least one
757	 * non-delayed extent.
758	 */
759	copied = 0;
760	for (i = 0; i < nrecs; i++) {
761		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
762		start_block = xfs_bmbt_get_startblock(ep);
763		if (isnullstartblock(start_block)) {
764			/*
765			 * It's a delayed allocation extent, so skip it.
766			 */
767			continue;
768		}
769
770		/* Translate to on disk format */
771		put_unaligned_be64(ep->l0, &dp->l0);
772		put_unaligned_be64(ep->l1, &dp->l1);
773		dp++;
774		copied++;
775	}
776	ASSERT(copied != 0);
777	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
778
779	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
780}
781
782/*
783 * Each of the following cases stores data into the same region
784 * of the on-disk inode, so only one of them can be valid at
785 * any given time. While it is possible to have conflicting formats
786 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
787 * in EXTENTS format, this can only happen when the fork has
788 * changed formats after being modified but before being flushed.
789 * In these cases, the format always takes precedence, because the
790 * format indicates the current state of the fork.
791 */
792void
793xfs_iflush_fork(
794	xfs_inode_t		*ip,
795	xfs_dinode_t		*dip,
796	xfs_inode_log_item_t	*iip,
797	int			whichfork)
798{
799	char			*cp;
800	xfs_ifork_t		*ifp;
801	xfs_mount_t		*mp;
802	static const short	brootflag[2] =
803		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
804	static const short	dataflag[2] =
805		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
806	static const short	extflag[2] =
807		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
808
809	if (!iip)
810		return;
811	ifp = XFS_IFORK_PTR(ip, whichfork);
812	/*
813	 * This can happen if we gave up in iformat in an error path,
814	 * for the attribute fork.
815	 */
816	if (!ifp) {
817		ASSERT(whichfork == XFS_ATTR_FORK);
818		return;
819	}
820	cp = XFS_DFORK_PTR(dip, whichfork);
821	mp = ip->i_mount;
822	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
823	case XFS_DINODE_FMT_LOCAL:
824		if ((iip->ili_fields & dataflag[whichfork]) &&
825		    (ifp->if_bytes > 0)) {
826			ASSERT(ifp->if_u1.if_data != NULL);
827			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
828			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
829		}
830		break;
831
832	case XFS_DINODE_FMT_EXTENTS:
833		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
834		       !(iip->ili_fields & extflag[whichfork]));
835		if ((iip->ili_fields & extflag[whichfork]) &&
836		    (ifp->if_bytes > 0)) {
837			ASSERT(xfs_iext_get_ext(ifp, 0));
838			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
839			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
840				whichfork);
841		}
842		break;
843
844	case XFS_DINODE_FMT_BTREE:
845		if ((iip->ili_fields & brootflag[whichfork]) &&
846		    (ifp->if_broot_bytes > 0)) {
847			ASSERT(ifp->if_broot != NULL);
848			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
849			        XFS_IFORK_SIZE(ip, whichfork));
850			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
851				(xfs_bmdr_block_t *)cp,
852				XFS_DFORK_SIZE(dip, mp, whichfork));
853		}
854		break;
855
856	case XFS_DINODE_FMT_DEV:
857		if (iip->ili_fields & XFS_ILOG_DEV) {
858			ASSERT(whichfork == XFS_DATA_FORK);
859			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
860		}
861		break;
862
863	case XFS_DINODE_FMT_UUID:
864		if (iip->ili_fields & XFS_ILOG_UUID) {
865			ASSERT(whichfork == XFS_DATA_FORK);
866			memcpy(XFS_DFORK_DPTR(dip),
867			       &ip->i_df.if_u2.if_uuid,
868			       sizeof(uuid_t));
869		}
870		break;
871
872	default:
873		ASSERT(0);
874		break;
875	}
876}
877
878/*
879 * Return a pointer to the extent record at file index idx.
880 */
881xfs_bmbt_rec_host_t *
882xfs_iext_get_ext(
883	xfs_ifork_t	*ifp,		/* inode fork pointer */
884	xfs_extnum_t	idx)		/* index of target extent */
885{
886	ASSERT(idx >= 0);
887	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
888
889	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
890		return ifp->if_u1.if_ext_irec->er_extbuf;
891	} else if (ifp->if_flags & XFS_IFEXTIREC) {
892		xfs_ext_irec_t	*erp;		/* irec pointer */
893		int		erp_idx = 0;	/* irec index */
894		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
895
896		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
897		return &erp->er_extbuf[page_idx];
898	} else if (ifp->if_bytes) {
899		return &ifp->if_u1.if_extents[idx];
900	} else {
901		return NULL;
902	}
903}
904
905/*
906 * Insert new item(s) into the extent records for incore inode
907 * fork 'ifp'.  'count' new items are inserted at index 'idx'.
908 */
909void
910xfs_iext_insert(
911	xfs_inode_t	*ip,		/* incore inode pointer */
912	xfs_extnum_t	idx,		/* starting index of new items */
913	xfs_extnum_t	count,		/* number of inserted items */
914	xfs_bmbt_irec_t	*new,		/* items to insert */
915	int		state)		/* type of extent conversion */
916{
917	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
918	xfs_extnum_t	i;		/* extent record index */
919
920	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
921
922	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
923	xfs_iext_add(ifp, idx, count);
924	for (i = idx; i < idx + count; i++, new++)
925		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
926}
927
928/*
929 * This is called when the amount of space required for incore file
930 * extents needs to be increased. The ext_diff parameter stores the
931 * number of new extents being added and the idx parameter contains
932 * the extent index where the new extents will be added. If the new
933 * extents are being appended, then we just need to (re)allocate and
934 * initialize the space. Otherwise, if the new extents are being
935 * inserted into the middle of the existing entries, a bit more work
936 * is required to make room for the new extents to be inserted. The
937 * caller is responsible for filling in the new extent entries upon
938 * return.
939 */
940void
941xfs_iext_add(
942	xfs_ifork_t	*ifp,		/* inode fork pointer */
943	xfs_extnum_t	idx,		/* index to begin adding exts */
944	int		ext_diff)	/* number of extents to add */
945{
946	int		byte_diff;	/* new bytes being added */
947	int		new_size;	/* size of extents after adding */
948	xfs_extnum_t	nextents;	/* number of extents in file */
949
950	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
951	ASSERT((idx >= 0) && (idx <= nextents));
952	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
953	new_size = ifp->if_bytes + byte_diff;
954	/*
955	 * If the new number of extents (nextents + ext_diff)
956	 * fits inside the inode, then continue to use the inline
957	 * extent buffer.
958	 */
959	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
960		if (idx < nextents) {
961			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
962				&ifp->if_u2.if_inline_ext[idx],
963				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
964			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
965		}
966		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
967		ifp->if_real_bytes = 0;
968	}
969	/*
970	 * Otherwise use a linear (direct) extent list.
971	 * If the extents are currently inside the inode,
972	 * xfs_iext_realloc_direct will switch us from
973	 * inline to direct extent allocation mode.
974	 */
975	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
976		xfs_iext_realloc_direct(ifp, new_size);
977		if (idx < nextents) {
978			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
979				&ifp->if_u1.if_extents[idx],
980				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
981			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
982		}
983	}
984	/* Indirection array */
985	else {
986		xfs_ext_irec_t	*erp;
987		int		erp_idx = 0;
988		int		page_idx = idx;
989
990		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
991		if (ifp->if_flags & XFS_IFEXTIREC) {
992			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
993		} else {
994			xfs_iext_irec_init(ifp);
995			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
996			erp = ifp->if_u1.if_ext_irec;
997		}
998		/* Extents fit in target extent page */
999		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1000			if (page_idx < erp->er_extcount) {
1001				memmove(&erp->er_extbuf[page_idx + ext_diff],
1002					&erp->er_extbuf[page_idx],
1003					(erp->er_extcount - page_idx) *
1004					sizeof(xfs_bmbt_rec_t));
1005				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1006			}
1007			erp->er_extcount += ext_diff;
1008			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1009		}
1010		/* Insert a new extent page */
1011		else if (erp) {
1012			xfs_iext_add_indirect_multi(ifp,
1013				erp_idx, page_idx, ext_diff);
1014		}
1015		/*
1016		 * If extent(s) are being appended to the last page in
1017		 * the indirection array and the new extent(s) don't fit
1018		 * in the page, then erp is NULL and erp_idx is set to
1019		 * the next index needed in the indirection array.
1020		 */
1021		else {
1022			uint	count = ext_diff;
1023
1024			while (count) {
1025				erp = xfs_iext_irec_new(ifp, erp_idx);
1026				erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1027				count -= erp->er_extcount;
1028				if (count)
1029					erp_idx++;
1030			}
1031		}
1032	}
1033	ifp->if_bytes = new_size;
1034}
1035
1036/*
1037 * This is called when incore extents are being added to the indirection
1038 * array and the new extents do not fit in the target extent list. The
1039 * erp_idx parameter contains the irec index for the target extent list
1040 * in the indirection array, and the idx parameter contains the extent
1041 * index within the list. The number of extents being added is stored
1042 * in the count parameter.
1043 *
1044 *    |-------|   |-------|
1045 *    |       |   |       |    idx - number of extents before idx
1046 *    |  idx  |   | count |
1047 *    |       |   |       |    count - number of extents being inserted at idx
1048 *    |-------|   |-------|
1049 *    | count |   | nex2  |    nex2 - number of extents after idx + count
1050 *    |-------|   |-------|
1051 */
1052void
1053xfs_iext_add_indirect_multi(
1054	xfs_ifork_t	*ifp,			/* inode fork pointer */
1055	int		erp_idx,		/* target extent irec index */
1056	xfs_extnum_t	idx,			/* index within target list */
1057	int		count)			/* new extents being added */
1058{
1059	int		byte_diff;		/* new bytes being added */
1060	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
1061	xfs_extnum_t	ext_diff;		/* number of extents to add */
1062	xfs_extnum_t	ext_cnt;		/* new extents still needed */
1063	xfs_extnum_t	nex2;			/* extents after idx + count */
1064	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
1065	int		nlists;			/* number of irec's (lists) */
1066
1067	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1068	erp = &ifp->if_u1.if_ext_irec[erp_idx];
1069	nex2 = erp->er_extcount - idx;
1070	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1071
1072	/*
1073	 * Save second part of target extent list
1074	 * (all extents past */
1075	if (nex2) {
1076		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1077		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1078		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1079		erp->er_extcount -= nex2;
1080		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1081		memset(&erp->er_extbuf[idx], 0, byte_diff);
1082	}
1083
1084	/*
1085	 * Add the new extents to the end of the target
1086	 * list, then allocate new irec record(s) and
1087	 * extent buffer(s) as needed to store the rest
1088	 * of the new extents.
1089	 */
1090	ext_cnt = count;
1091	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1092	if (ext_diff) {
1093		erp->er_extcount += ext_diff;
1094		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1095		ext_cnt -= ext_diff;
1096	}
1097	while (ext_cnt) {
1098		erp_idx++;
1099		erp = xfs_iext_irec_new(ifp, erp_idx);
1100		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1101		erp->er_extcount = ext_diff;
1102		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1103		ext_cnt -= ext_diff;
1104	}
1105
1106	/* Add nex2 extents back to indirection array */
1107	if (nex2) {
1108		xfs_extnum_t	ext_avail;
1109		int		i;
1110
1111		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1112		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1113		i = 0;
1114		/*
1115		 * If nex2 extents fit in the current page, append
1116		 * nex2_ep after the new extents.
1117		 */
1118		if (nex2 <= ext_avail) {
1119			i = erp->er_extcount;
1120		}
1121		/*
1122		 * Otherwise, check if space is available in the
1123		 * next page.
1124		 */
1125		else if ((erp_idx < nlists - 1) &&
1126			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1127			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1128			erp_idx++;
1129			erp++;
1130			/* Create a hole for nex2 extents */
1131			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1132				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1133		}
1134		/*
1135		 * Final choice, create a new extent page for
1136		 * nex2 extents.
1137		 */
1138		else {
1139			erp_idx++;
1140			erp = xfs_iext_irec_new(ifp, erp_idx);
1141		}
1142		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1143		kmem_free(nex2_ep);
1144		erp->er_extcount += nex2;
1145		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1146	}
1147}
1148
1149/*
1150 * This is called when the amount of space required for incore file
1151 * extents needs to be decreased. The ext_diff parameter stores the
1152 * number of extents to be removed and the idx parameter contains
1153 * the extent index where the extents will be removed from.
1154 *
1155 * If the amount of space needed has decreased below the linear
1156 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1157 * extent array.  Otherwise, use kmem_realloc() to adjust the
1158 * size to what is needed.
1159 */
1160void
1161xfs_iext_remove(
1162	xfs_inode_t	*ip,		/* incore inode pointer */
1163	xfs_extnum_t	idx,		/* index to begin removing exts */
1164	int		ext_diff,	/* number of extents to remove */
1165	int		state)		/* type of extent conversion */
1166{
1167	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
1168	xfs_extnum_t	nextents;	/* number of extents in file */
1169	int		new_size;	/* size of extents after removal */
1170
1171	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1172
1173	ASSERT(ext_diff > 0);
1174	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1175	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1176
1177	if (new_size == 0) {
1178		xfs_iext_destroy(ifp);
1179	} else if (ifp->if_flags & XFS_IFEXTIREC) {
1180		xfs_iext_remove_indirect(ifp, idx, ext_diff);
1181	} else if (ifp->if_real_bytes) {
1182		xfs_iext_remove_direct(ifp, idx, ext_diff);
1183	} else {
1184		xfs_iext_remove_inline(ifp, idx, ext_diff);
1185	}
1186	ifp->if_bytes = new_size;
1187}
1188
1189/*
1190 * This removes ext_diff extents from the inline buffer, beginning
1191 * at extent index idx.
1192 */
1193void
1194xfs_iext_remove_inline(
1195	xfs_ifork_t	*ifp,		/* inode fork pointer */
1196	xfs_extnum_t	idx,		/* index to begin removing exts */
1197	int		ext_diff)	/* number of extents to remove */
1198{
1199	int		nextents;	/* number of extents in file */
1200
1201	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1202	ASSERT(idx < XFS_INLINE_EXTS);
1203	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1204	ASSERT(((nextents - ext_diff) > 0) &&
1205		(nextents - ext_diff) < XFS_INLINE_EXTS);
1206
1207	if (idx + ext_diff < nextents) {
1208		memmove(&ifp->if_u2.if_inline_ext[idx],
1209			&ifp->if_u2.if_inline_ext[idx + ext_diff],
1210			(nextents - (idx + ext_diff)) *
1211			 sizeof(xfs_bmbt_rec_t));
1212		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1213			0, ext_diff * sizeof(xfs_bmbt_rec_t));
1214	} else {
1215		memset(&ifp->if_u2.if_inline_ext[idx], 0,
1216			ext_diff * sizeof(xfs_bmbt_rec_t));
1217	}
1218}
1219
1220/*
1221 * This removes ext_diff extents from a linear (direct) extent list,
1222 * beginning at extent index idx. If the extents are being removed
1223 * from the end of the list (ie. truncate) then we just need to re-
1224 * allocate the list to remove the extra space. Otherwise, if the
1225 * extents are being removed from the middle of the existing extent
1226 * entries, then we first need to move the extent records beginning
1227 * at idx + ext_diff up in the list to overwrite the records being
1228 * removed, then remove the extra space via kmem_realloc.
1229 */
1230void
1231xfs_iext_remove_direct(
1232	xfs_ifork_t	*ifp,		/* inode fork pointer */
1233	xfs_extnum_t	idx,		/* index to begin removing exts */
1234	int		ext_diff)	/* number of extents to remove */
1235{
1236	xfs_extnum_t	nextents;	/* number of extents in file */
1237	int		new_size;	/* size of extents after removal */
1238
1239	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1240	new_size = ifp->if_bytes -
1241		(ext_diff * sizeof(xfs_bmbt_rec_t));
1242	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1243
1244	if (new_size == 0) {
1245		xfs_iext_destroy(ifp);
1246		return;
1247	}
1248	/* Move extents up in the list (if needed) */
1249	if (idx + ext_diff < nextents) {
1250		memmove(&ifp->if_u1.if_extents[idx],
1251			&ifp->if_u1.if_extents[idx + ext_diff],
1252			(nextents - (idx + ext_diff)) *
1253			 sizeof(xfs_bmbt_rec_t));
1254	}
1255	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1256		0, ext_diff * sizeof(xfs_bmbt_rec_t));
1257	/*
1258	 * Reallocate the direct extent list. If the extents
1259	 * will fit inside the inode then xfs_iext_realloc_direct
1260	 * will switch from direct to inline extent allocation
1261	 * mode for us.
1262	 */
1263	xfs_iext_realloc_direct(ifp, new_size);
1264	ifp->if_bytes = new_size;
1265}
1266
1267/*
1268 * This is called when incore extents are being removed from the
1269 * indirection array and the extents being removed span multiple extent
1270 * buffers. The idx parameter contains the file extent index where we
1271 * want to begin removing extents, and the count parameter contains
1272 * how many extents need to be removed.
1273 *
1274 *    |-------|   |-------|
1275 *    | nex1  |   |       |    nex1 - number of extents before idx
1276 *    |-------|   | count |
1277 *    |       |   |       |    count - number of extents being removed at idx
1278 *    | count |   |-------|
1279 *    |       |   | nex2  |    nex2 - number of extents after idx + count
1280 *    |-------|   |-------|
1281 */
1282void
1283xfs_iext_remove_indirect(
1284	xfs_ifork_t	*ifp,		/* inode fork pointer */
1285	xfs_extnum_t	idx,		/* index to begin removing extents */
1286	int		count)		/* number of extents to remove */
1287{
1288	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1289	int		erp_idx = 0;	/* indirection array index */
1290	xfs_extnum_t	ext_cnt;	/* extents left to remove */
1291	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
1292	xfs_extnum_t	nex1;		/* number of extents before idx */
1293	xfs_extnum_t	nex2;		/* extents after idx + count */
1294	int		page_idx = idx;	/* index in target extent list */
1295
1296	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1297	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
1298	ASSERT(erp != NULL);
1299	nex1 = page_idx;
1300	ext_cnt = count;
1301	while (ext_cnt) {
1302		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1303		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1304		/*
1305		 * Check for deletion of entire list;
1306		 * xfs_iext_irec_remove() updates extent offsets.
1307		 */
1308		if (ext_diff == erp->er_extcount) {
1309			xfs_iext_irec_remove(ifp, erp_idx);
1310			ext_cnt -= ext_diff;
1311			nex1 = 0;
1312			if (ext_cnt) {
1313				ASSERT(erp_idx < ifp->if_real_bytes /
1314					XFS_IEXT_BUFSZ);
1315				erp = &ifp->if_u1.if_ext_irec[erp_idx];
1316				nex1 = 0;
1317				continue;
1318			} else {
1319				break;
1320			}
1321		}
1322		/* Move extents up (if needed) */
1323		if (nex2) {
1324			memmove(&erp->er_extbuf[nex1],
1325				&erp->er_extbuf[nex1 + ext_diff],
1326				nex2 * sizeof(xfs_bmbt_rec_t));
1327		}
1328		/* Zero out rest of page */
1329		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1330			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1331		/* Update remaining counters */
1332		erp->er_extcount -= ext_diff;
1333		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1334		ext_cnt -= ext_diff;
1335		nex1 = 0;
1336		erp_idx++;
1337		erp++;
1338	}
1339	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1340	xfs_iext_irec_compact(ifp);
1341}
1342
1343/*
1344 * Create, destroy, or resize a linear (direct) block of extents.
1345 */
1346void
1347xfs_iext_realloc_direct(
1348	xfs_ifork_t	*ifp,		/* inode fork pointer */
1349	int		new_size)	/* new size of extents after adding */
1350{
1351	int		rnew_size;	/* real new size of extents */
1352
1353	rnew_size = new_size;
1354
1355	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1356		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1357		 (new_size != ifp->if_real_bytes)));
1358
1359	/* Free extent records */
1360	if (new_size == 0) {
1361		xfs_iext_destroy(ifp);
1362	}
1363	/* Resize direct extent list and zero any new bytes */
1364	else if (ifp->if_real_bytes) {
1365		/* Check if extents will fit inside the inode */
1366		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1367			xfs_iext_direct_to_inline(ifp, new_size /
1368				(uint)sizeof(xfs_bmbt_rec_t));
1369			ifp->if_bytes = new_size;
1370			return;
1371		}
1372		if (!is_power_of_2(new_size)){
1373			rnew_size = roundup_pow_of_two(new_size);
1374		}
1375		if (rnew_size != ifp->if_real_bytes) {
1376			ifp->if_u1.if_extents =
1377				kmem_realloc(ifp->if_u1.if_extents,
1378						rnew_size,
1379						ifp->if_real_bytes, KM_NOFS);
1380		}
1381		if (rnew_size > ifp->if_real_bytes) {
1382			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1383				(uint)sizeof(xfs_bmbt_rec_t)], 0,
1384				rnew_size - ifp->if_real_bytes);
1385		}
1386	}
1387	/* Switch from the inline extent buffer to a direct extent list */
1388	else {
1389		if (!is_power_of_2(new_size)) {
1390			rnew_size = roundup_pow_of_two(new_size);
1391		}
1392		xfs_iext_inline_to_direct(ifp, rnew_size);
1393	}
1394	ifp->if_real_bytes = rnew_size;
1395	ifp->if_bytes = new_size;
1396}
1397
1398/*
1399 * Switch from linear (direct) extent records to inline buffer.
1400 */
1401void
1402xfs_iext_direct_to_inline(
1403	xfs_ifork_t	*ifp,		/* inode fork pointer */
1404	xfs_extnum_t	nextents)	/* number of extents in file */
1405{
1406	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1407	ASSERT(nextents <= XFS_INLINE_EXTS);
1408	/*
1409	 * The inline buffer was zeroed when we switched
1410	 * from inline to direct extent allocation mode,
1411	 * so we don't need to clear it here.
1412	 */
1413	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1414		nextents * sizeof(xfs_bmbt_rec_t));
1415	kmem_free(ifp->if_u1.if_extents);
1416	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1417	ifp->if_real_bytes = 0;
1418}
1419
1420/*
1421 * Switch from inline buffer to linear (direct) extent records.
1422 * new_size should already be rounded up to the next power of 2
1423 * by the caller (when appropriate), so use new_size as it is.
1424 * However, since new_size may be rounded up, we can't update
1425 * if_bytes here. It is the caller's responsibility to update
1426 * if_bytes upon return.
1427 */
1428void
1429xfs_iext_inline_to_direct(
1430	xfs_ifork_t	*ifp,		/* inode fork pointer */
1431	int		new_size)	/* number of extents in file */
1432{
1433	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1434	memset(ifp->if_u1.if_extents, 0, new_size);
1435	if (ifp->if_bytes) {
1436		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1437			ifp->if_bytes);
1438		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1439			sizeof(xfs_bmbt_rec_t));
1440	}
1441	ifp->if_real_bytes = new_size;
1442}
1443
1444/*
1445 * Resize an extent indirection array to new_size bytes.
1446 */
1447STATIC void
1448xfs_iext_realloc_indirect(
1449	xfs_ifork_t	*ifp,		/* inode fork pointer */
1450	int		new_size)	/* new indirection array size */
1451{
1452	int		nlists;		/* number of irec's (ex lists) */
1453	int		size;		/* current indirection array size */
1454
1455	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1456	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1457	size = nlists * sizeof(xfs_ext_irec_t);
1458	ASSERT(ifp->if_real_bytes);
1459	ASSERT((new_size >= 0) && (new_size != size));
1460	if (new_size == 0) {
1461		xfs_iext_destroy(ifp);
1462	} else {
1463		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
1464			kmem_realloc(ifp->if_u1.if_ext_irec,
1465				new_size, size, KM_NOFS);
1466	}
1467}
1468
1469/*
1470 * Switch from indirection array to linear (direct) extent allocations.
1471 */
1472STATIC void
1473xfs_iext_indirect_to_direct(
1474	 xfs_ifork_t	*ifp)		/* inode fork pointer */
1475{
1476	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
1477	xfs_extnum_t	nextents;	/* number of extents in file */
1478	int		size;		/* size of file extents */
1479
1480	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1481	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1482	ASSERT(nextents <= XFS_LINEAR_EXTS);
1483	size = nextents * sizeof(xfs_bmbt_rec_t);
1484
1485	xfs_iext_irec_compact_pages(ifp);
1486	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1487
1488	ep = ifp->if_u1.if_ext_irec->er_extbuf;
1489	kmem_free(ifp->if_u1.if_ext_irec);
1490	ifp->if_flags &= ~XFS_IFEXTIREC;
1491	ifp->if_u1.if_extents = ep;
1492	ifp->if_bytes = size;
1493	if (nextents < XFS_LINEAR_EXTS) {
1494		xfs_iext_realloc_direct(ifp, size);
1495	}
1496}
1497
1498/*
1499 * Free incore file extents.
1500 */
1501void
1502xfs_iext_destroy(
1503	xfs_ifork_t	*ifp)		/* inode fork pointer */
1504{
1505	if (ifp->if_flags & XFS_IFEXTIREC) {
1506		int	erp_idx;
1507		int	nlists;
1508
1509		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1510		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1511			xfs_iext_irec_remove(ifp, erp_idx);
1512		}
1513		ifp->if_flags &= ~XFS_IFEXTIREC;
1514	} else if (ifp->if_real_bytes) {
1515		kmem_free(ifp->if_u1.if_extents);
1516	} else if (ifp->if_bytes) {
1517		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1518			sizeof(xfs_bmbt_rec_t));
1519	}
1520	ifp->if_u1.if_extents = NULL;
1521	ifp->if_real_bytes = 0;
1522	ifp->if_bytes = 0;
1523}
1524
1525/*
1526 * Return a pointer to the extent record for file system block bno.
1527 */
1528xfs_bmbt_rec_host_t *			/* pointer to found extent record */
1529xfs_iext_bno_to_ext(
1530	xfs_ifork_t	*ifp,		/* inode fork pointer */
1531	xfs_fileoff_t	bno,		/* block number to search for */
1532	xfs_extnum_t	*idxp)		/* index of target extent */
1533{
1534	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
1535	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
1536	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
1537	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
1538	int		high;		/* upper boundary in search */
1539	xfs_extnum_t	idx = 0;	/* index of target extent */
1540	int		low;		/* lower boundary in search */
1541	xfs_extnum_t	nextents;	/* number of file extents */
1542	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
1543
1544	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1545	if (nextents == 0) {
1546		*idxp = 0;
1547		return NULL;
1548	}
1549	low = 0;
1550	if (ifp->if_flags & XFS_IFEXTIREC) {
1551		/* Find target extent list */
1552		int	erp_idx = 0;
1553		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1554		base = erp->er_extbuf;
1555		high = erp->er_extcount - 1;
1556	} else {
1557		base = ifp->if_u1.if_extents;
1558		high = nextents - 1;
1559	}
1560	/* Binary search extent records */
1561	while (low <= high) {
1562		idx = (low + high) >> 1;
1563		ep = base + idx;
1564		startoff = xfs_bmbt_get_startoff(ep);
1565		blockcount = xfs_bmbt_get_blockcount(ep);
1566		if (bno < startoff) {
1567			high = idx - 1;
1568		} else if (bno >= startoff + blockcount) {
1569			low = idx + 1;
1570		} else {
1571			/* Convert back to file-based extent index */
1572			if (ifp->if_flags & XFS_IFEXTIREC) {
1573				idx += erp->er_extoff;
1574			}
1575			*idxp = idx;
1576			return ep;
1577		}
1578	}
1579	/* Convert back to file-based extent index */
1580	if (ifp->if_flags & XFS_IFEXTIREC) {
1581		idx += erp->er_extoff;
1582	}
1583	if (bno >= startoff + blockcount) {
1584		if (++idx == nextents) {
1585			ep = NULL;
1586		} else {
1587			ep = xfs_iext_get_ext(ifp, idx);
1588		}
1589	}
1590	*idxp = idx;
1591	return ep;
1592}
1593
1594/*
1595 * Return a pointer to the indirection array entry containing the
1596 * extent record for filesystem block bno. Store the index of the
1597 * target irec in *erp_idxp.
1598 */
1599xfs_ext_irec_t *			/* pointer to found extent record */
1600xfs_iext_bno_to_irec(
1601	xfs_ifork_t	*ifp,		/* inode fork pointer */
1602	xfs_fileoff_t	bno,		/* block number to search for */
1603	int		*erp_idxp)	/* irec index of target ext list */
1604{
1605	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
1606	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
1607	int		erp_idx;	/* indirection array index */
1608	int		nlists;		/* number of extent irec's (lists) */
1609	int		high;		/* binary search upper limit */
1610	int		low;		/* binary search lower limit */
1611
1612	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1613	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1614	erp_idx = 0;
1615	low = 0;
1616	high = nlists - 1;
1617	while (low <= high) {
1618		erp_idx = (low + high) >> 1;
1619		erp = &ifp->if_u1.if_ext_irec[erp_idx];
1620		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1621		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1622			high = erp_idx - 1;
1623		} else if (erp_next && bno >=
1624			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1625			low = erp_idx + 1;
1626		} else {
1627			break;
1628		}
1629	}
1630	*erp_idxp = erp_idx;
1631	return erp;
1632}
1633
1634/*
1635 * Return a pointer to the indirection array entry containing the
1636 * extent record at file extent index *idxp. Store the index of the
1637 * target irec in *erp_idxp and store the page index of the target
1638 * extent record in *idxp.
1639 */
1640xfs_ext_irec_t *
1641xfs_iext_idx_to_irec(
1642	xfs_ifork_t	*ifp,		/* inode fork pointer */
1643	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
1644	int		*erp_idxp,	/* pointer to target irec */
1645	int		realloc)	/* new bytes were just added */
1646{
1647	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
1648	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
1649	int		erp_idx;	/* indirection array index */
1650	int		nlists;		/* number of irec's (ex lists) */
1651	int		high;		/* binary search upper limit */
1652	int		low;		/* binary search lower limit */
1653	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
1654
1655	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1656	ASSERT(page_idx >= 0);
1657	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
1658	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
1659
1660	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1661	erp_idx = 0;
1662	low = 0;
1663	high = nlists - 1;
1664
1665	/* Binary search extent irec's */
1666	while (low <= high) {
1667		erp_idx = (low + high) >> 1;
1668		erp = &ifp->if_u1.if_ext_irec[erp_idx];
1669		prev = erp_idx > 0 ? erp - 1 : NULL;
1670		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1671		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1672			high = erp_idx - 1;
1673		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
1674			   (page_idx == erp->er_extoff + erp->er_extcount &&
1675			    !realloc)) {
1676			low = erp_idx + 1;
1677		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
1678			   erp->er_extcount == XFS_LINEAR_EXTS) {
1679			ASSERT(realloc);
1680			page_idx = 0;
1681			erp_idx++;
1682			erp = erp_idx < nlists ? erp + 1 : NULL;
1683			break;
1684		} else {
1685			page_idx -= erp->er_extoff;
1686			break;
1687		}
1688	}
1689	*idxp = page_idx;
1690	*erp_idxp = erp_idx;
1691	return erp;
1692}
1693
1694/*
1695 * Allocate and initialize an indirection array once the space needed
1696 * for incore extents increases above XFS_IEXT_BUFSZ.
1697 */
1698void
1699xfs_iext_irec_init(
1700	xfs_ifork_t	*ifp)		/* inode fork pointer */
1701{
1702	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1703	xfs_extnum_t	nextents;	/* number of extents in file */
1704
1705	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1706	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1707	ASSERT(nextents <= XFS_LINEAR_EXTS);
1708
1709	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1710
1711	if (nextents == 0) {
1712		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1713	} else if (!ifp->if_real_bytes) {
1714		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1715	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1716		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1717	}
1718	erp->er_extbuf = ifp->if_u1.if_extents;
1719	erp->er_extcount = nextents;
1720	erp->er_extoff = 0;
1721
1722	ifp->if_flags |= XFS_IFEXTIREC;
1723	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1724	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1725	ifp->if_u1.if_ext_irec = erp;
1726
1727	return;
1728}
1729
1730/*
1731 * Allocate and initialize a new entry in the indirection array.
1732 */
1733xfs_ext_irec_t *
1734xfs_iext_irec_new(
1735	xfs_ifork_t	*ifp,		/* inode fork pointer */
1736	int		erp_idx)	/* index for new irec */
1737{
1738	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1739	int		i;		/* loop counter */
1740	int		nlists;		/* number of irec's (ex lists) */
1741
1742	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1743	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1744
1745	/* Resize indirection array */
1746	xfs_iext_realloc_indirect(ifp, ++nlists *
1747				  sizeof(xfs_ext_irec_t));
1748	/*
1749	 * Move records down in the array so the
1750	 * new page can use erp_idx.
1751	 */
1752	erp = ifp->if_u1.if_ext_irec;
1753	for (i = nlists - 1; i > erp_idx; i--) {
1754		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1755	}
1756	ASSERT(i == erp_idx);
1757
1758	/* Initialize new extent record */
1759	erp = ifp->if_u1.if_ext_irec;
1760	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1761	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1762	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1763	erp[erp_idx].er_extcount = 0;
1764	erp[erp_idx].er_extoff = erp_idx > 0 ?
1765		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1766	return (&erp[erp_idx]);
1767}
1768
1769/*
1770 * Remove a record from the indirection array.
1771 */
1772void
1773xfs_iext_irec_remove(
1774	xfs_ifork_t	*ifp,		/* inode fork pointer */
1775	int		erp_idx)	/* irec index to remove */
1776{
1777	xfs_ext_irec_t	*erp;		/* indirection array pointer */
1778	int		i;		/* loop counter */
1779	int		nlists;		/* number of irec's (ex lists) */
1780
1781	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1782	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1783	erp = &ifp->if_u1.if_ext_irec[erp_idx];
1784	if (erp->er_extbuf) {
1785		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1786			-erp->er_extcount);
1787		kmem_free(erp->er_extbuf);
1788	}
1789	/* Compact extent records */
1790	erp = ifp->if_u1.if_ext_irec;
1791	for (i = erp_idx; i < nlists - 1; i++) {
1792		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1793	}
1794	/*
1795	 * Manually free the last extent record from the indirection
1796	 * array.  A call to xfs_iext_realloc_indirect() with a size
1797	 * of zero would result in a call to xfs_iext_destroy() which
1798	 * would in turn call this function again, creating a nasty
1799	 * infinite loop.
1800	 */
1801	if (--nlists) {
1802		xfs_iext_realloc_indirect(ifp,
1803			nlists * sizeof(xfs_ext_irec_t));
1804	} else {
1805		kmem_free(ifp->if_u1.if_ext_irec);
1806	}
1807	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1808}
1809
1810/*
1811 * This is called to clean up large amounts of unused memory allocated
1812 * by the indirection array.  Before compacting anything though, verify
1813 * that the indirection array is still needed and switch back to the
1814 * linear extent list (or even the inline buffer) if possible.  The
1815 * compaction policy is as follows:
1816 *
1817 *    Full Compaction: Extents fit into a single page (or inline buffer)
1818 * Partial Compaction: Extents occupy less than 50% of allocated space
1819 *      No Compaction: Extents occupy at least 50% of allocated space
1820 */
1821void
1822xfs_iext_irec_compact(
1823	xfs_ifork_t	*ifp)		/* inode fork pointer */
1824{
1825	xfs_extnum_t	nextents;	/* number of extents in file */
1826	int		nlists;		/* number of irec's (ex lists) */
1827
1828	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1829	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1830	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1831
1832	if (nextents == 0) {
1833		xfs_iext_destroy(ifp);
1834	} else if (nextents <= XFS_INLINE_EXTS) {
1835		xfs_iext_indirect_to_direct(ifp);
1836		xfs_iext_direct_to_inline(ifp, nextents);
1837	} else if (nextents <= XFS_LINEAR_EXTS) {
1838		xfs_iext_indirect_to_direct(ifp);
1839	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1840		xfs_iext_irec_compact_pages(ifp);
1841	}
1842}
1843
1844/*
1845 * Combine extents from neighboring extent pages.
1846 */
1847void
1848xfs_iext_irec_compact_pages(
1849	xfs_ifork_t	*ifp)		/* inode fork pointer */
1850{
1851	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
1852	int		erp_idx = 0;	/* indirection array index */
1853	int		nlists;		/* number of irec's (ex lists) */
1854
1855	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1856	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1857	while (erp_idx < nlists - 1) {
1858		erp = &ifp->if_u1.if_ext_irec[erp_idx];
1859		erp_next = erp + 1;
1860		if (erp_next->er_extcount <=
1861		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
1862			memcpy(&erp->er_extbuf[erp->er_extcount],
1863				erp_next->er_extbuf, erp_next->er_extcount *
1864				sizeof(xfs_bmbt_rec_t));
1865			erp->er_extcount += erp_next->er_extcount;
1866			/*
1867			 * Free page before removing extent record
1868			 * so er_extoffs don't get modified in
1869			 * xfs_iext_irec_remove.
1870			 */
1871			kmem_free(erp_next->er_extbuf);
1872			erp_next->er_extbuf = NULL;
1873			xfs_iext_irec_remove(ifp, erp_idx + 1);
1874			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1875		} else {
1876			erp_idx++;
1877		}
1878	}
1879}
1880
1881/*
1882 * This is called to update the er_extoff field in the indirection
1883 * array when extents have been added or removed from one of the
1884 * extent lists. erp_idx contains the irec index to begin updating
1885 * at and ext_diff contains the number of extents that were added
1886 * or removed.
1887 */
1888void
1889xfs_iext_irec_update_extoffs(
1890	xfs_ifork_t	*ifp,		/* inode fork pointer */
1891	int		erp_idx,	/* irec index to update */
1892	int		ext_diff)	/* number of new extents */
1893{
1894	int		i;		/* loop counter */
1895	int		nlists;		/* number of irec's (ex lists */
1896
1897	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1898	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1899	for (i = erp_idx; i < nlists; i++) {
1900		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1901	}
1902}
1903