1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_shared.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
27#include "xfs_da_format.h"
28#include "xfs_da_btree.h"
29#include "xfs_dir2.h"
30#include "xfs_inode.h"
31#include "xfs_btree.h"
32#include "xfs_trans.h"
33#include "xfs_inode_item.h"
34#include "xfs_extfree_item.h"
35#include "xfs_alloc.h"
36#include "xfs_bmap.h"
37#include "xfs_bmap_util.h"
38#include "xfs_bmap_btree.h"
39#include "xfs_rtalloc.h"
40#include "xfs_error.h"
41#include "xfs_quota.h"
42#include "xfs_trans_space.h"
43#include "xfs_buf_item.h"
44#include "xfs_trace.h"
45#include "xfs_symlink.h"
46#include "xfs_attr_leaf.h"
47#include "xfs_filestream.h"
48
49
50kmem_zone_t		*xfs_bmap_free_item_zone;
51
52/*
53 * Miscellaneous helper functions
54 */
55
56/*
57 * Compute and fill in the value of the maximum depth of a bmap btree
58 * in this filesystem.  Done once, during mount.
59 */
60void
61xfs_bmap_compute_maxlevels(
62	xfs_mount_t	*mp,		/* file system mount structure */
63	int		whichfork)	/* data or attr fork */
64{
65	int		level;		/* btree level */
66	uint		maxblocks;	/* max blocks at this level */
67	uint		maxleafents;	/* max leaf entries possible */
68	int		maxrootrecs;	/* max records in root block */
69	int		minleafrecs;	/* min records in leaf block */
70	int		minnoderecs;	/* min records in node block */
71	int		sz;		/* root block size */
72
73	/*
74	 * The maximum number of extents in a file, hence the maximum
75	 * number of leaf entries, is controlled by the type of di_nextents
76	 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
77	 * (a signed 16-bit number, xfs_aextnum_t).
78	 *
79	 * Note that we can no longer assume that if we are in ATTR1 that
80	 * the fork offset of all the inodes will be
81	 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
82	 * with ATTR2 and then mounted back with ATTR1, keeping the
83	 * di_forkoff's fixed but probably at various positions. Therefore,
84	 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
85	 * of a minimum size available.
86	 */
87	if (whichfork == XFS_DATA_FORK) {
88		maxleafents = MAXEXTNUM;
89		sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
90	} else {
91		maxleafents = MAXAEXTNUM;
92		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
93	}
94	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
95	minleafrecs = mp->m_bmap_dmnr[0];
96	minnoderecs = mp->m_bmap_dmnr[1];
97	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
98	for (level = 1; maxblocks > 1; level++) {
99		if (maxblocks <= maxrootrecs)
100			maxblocks = 1;
101		else
102			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
103	}
104	mp->m_bm_maxlevels[whichfork] = level;
105}
106
107STATIC int				/* error */
108xfs_bmbt_lookup_eq(
109	struct xfs_btree_cur	*cur,
110	xfs_fileoff_t		off,
111	xfs_fsblock_t		bno,
112	xfs_filblks_t		len,
113	int			*stat)	/* success/failure */
114{
115	cur->bc_rec.b.br_startoff = off;
116	cur->bc_rec.b.br_startblock = bno;
117	cur->bc_rec.b.br_blockcount = len;
118	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
119}
120
121STATIC int				/* error */
122xfs_bmbt_lookup_ge(
123	struct xfs_btree_cur	*cur,
124	xfs_fileoff_t		off,
125	xfs_fsblock_t		bno,
126	xfs_filblks_t		len,
127	int			*stat)	/* success/failure */
128{
129	cur->bc_rec.b.br_startoff = off;
130	cur->bc_rec.b.br_startblock = bno;
131	cur->bc_rec.b.br_blockcount = len;
132	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
133}
134
135/*
136 * Check if the inode needs to be converted to btree format.
137 */
138static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
139{
140	return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
141		XFS_IFORK_NEXTENTS(ip, whichfork) >
142			XFS_IFORK_MAXEXT(ip, whichfork);
143}
144
145/*
146 * Check if the inode should be converted to extent format.
147 */
148static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
149{
150	return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
151		XFS_IFORK_NEXTENTS(ip, whichfork) <=
152			XFS_IFORK_MAXEXT(ip, whichfork);
153}
154
155/*
156 * Update the record referred to by cur to the value given
157 * by [off, bno, len, state].
158 * This either works (return 0) or gets an EFSCORRUPTED error.
159 */
160STATIC int
161xfs_bmbt_update(
162	struct xfs_btree_cur	*cur,
163	xfs_fileoff_t		off,
164	xfs_fsblock_t		bno,
165	xfs_filblks_t		len,
166	xfs_exntst_t		state)
167{
168	union xfs_btree_rec	rec;
169
170	xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
171	return xfs_btree_update(cur, &rec);
172}
173
174/*
175 * Compute the worst-case number of indirect blocks that will be used
176 * for ip's delayed extent of length "len".
177 */
178STATIC xfs_filblks_t
179xfs_bmap_worst_indlen(
180	xfs_inode_t	*ip,		/* incore inode pointer */
181	xfs_filblks_t	len)		/* delayed extent length */
182{
183	int		level;		/* btree level number */
184	int		maxrecs;	/* maximum record count at this level */
185	xfs_mount_t	*mp;		/* mount structure */
186	xfs_filblks_t	rval;		/* return value */
187
188	mp = ip->i_mount;
189	maxrecs = mp->m_bmap_dmxr[0];
190	for (level = 0, rval = 0;
191	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
192	     level++) {
193		len += maxrecs - 1;
194		do_div(len, maxrecs);
195		rval += len;
196		if (len == 1)
197			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
198				level - 1;
199		if (level == 0)
200			maxrecs = mp->m_bmap_dmxr[1];
201	}
202	return rval;
203}
204
205/*
206 * Calculate the default attribute fork offset for newly created inodes.
207 */
208uint
209xfs_default_attroffset(
210	struct xfs_inode	*ip)
211{
212	struct xfs_mount	*mp = ip->i_mount;
213	uint			offset;
214
215	if (mp->m_sb.sb_inodesize == 256) {
216		offset = XFS_LITINO(mp, ip->i_d.di_version) -
217				XFS_BMDR_SPACE_CALC(MINABTPTRS);
218	} else {
219		offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
220	}
221
222	ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
223	return offset;
224}
225
226/*
227 * Helper routine to reset inode di_forkoff field when switching
228 * attribute fork from local to extent format - we reset it where
229 * possible to make space available for inline data fork extents.
230 */
231STATIC void
232xfs_bmap_forkoff_reset(
233	xfs_inode_t	*ip,
234	int		whichfork)
235{
236	if (whichfork == XFS_ATTR_FORK &&
237	    ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
238	    ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
239	    ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
240		uint	dfl_forkoff = xfs_default_attroffset(ip) >> 3;
241
242		if (dfl_forkoff > ip->i_d.di_forkoff)
243			ip->i_d.di_forkoff = dfl_forkoff;
244	}
245}
246
247#ifdef DEBUG
248STATIC struct xfs_buf *
249xfs_bmap_get_bp(
250	struct xfs_btree_cur	*cur,
251	xfs_fsblock_t		bno)
252{
253	struct xfs_log_item_desc *lidp;
254	int			i;
255
256	if (!cur)
257		return NULL;
258
259	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
260		if (!cur->bc_bufs[i])
261			break;
262		if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
263			return cur->bc_bufs[i];
264	}
265
266	/* Chase down all the log items to see if the bp is there */
267	list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
268		struct xfs_buf_log_item	*bip;
269		bip = (struct xfs_buf_log_item *)lidp->lid_item;
270		if (bip->bli_item.li_type == XFS_LI_BUF &&
271		    XFS_BUF_ADDR(bip->bli_buf) == bno)
272			return bip->bli_buf;
273	}
274
275	return NULL;
276}
277
278STATIC void
279xfs_check_block(
280	struct xfs_btree_block	*block,
281	xfs_mount_t		*mp,
282	int			root,
283	short			sz)
284{
285	int			i, j, dmxr;
286	__be64			*pp, *thispa;	/* pointer to block address */
287	xfs_bmbt_key_t		*prevp, *keyp;
288
289	ASSERT(be16_to_cpu(block->bb_level) > 0);
290
291	prevp = NULL;
292	for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
293		dmxr = mp->m_bmap_dmxr[0];
294		keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
295
296		if (prevp) {
297			ASSERT(be64_to_cpu(prevp->br_startoff) <
298			       be64_to_cpu(keyp->br_startoff));
299		}
300		prevp = keyp;
301
302		/*
303		 * Compare the block numbers to see if there are dups.
304		 */
305		if (root)
306			pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
307		else
308			pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
309
310		for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
311			if (root)
312				thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
313			else
314				thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
315			if (*thispa == *pp) {
316				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
317					__func__, j, i,
318					(unsigned long long)be64_to_cpu(*thispa));
319				panic("%s: ptrs are equal in node\n",
320					__func__);
321			}
322		}
323	}
324}
325
326/*
327 * Check that the extents for the inode ip are in the right order in all
328 * btree leaves.
329 */
330
331STATIC void
332xfs_bmap_check_leaf_extents(
333	xfs_btree_cur_t		*cur,	/* btree cursor or null */
334	xfs_inode_t		*ip,		/* incore inode pointer */
335	int			whichfork)	/* data or attr fork */
336{
337	struct xfs_btree_block	*block;	/* current btree block */
338	xfs_fsblock_t		bno;	/* block # of "block" */
339	xfs_buf_t		*bp;	/* buffer for "block" */
340	int			error;	/* error return value */
341	xfs_extnum_t		i=0, j;	/* index into the extents list */
342	xfs_ifork_t		*ifp;	/* fork structure */
343	int			level;	/* btree level, for checking */
344	xfs_mount_t		*mp;	/* file system mount structure */
345	__be64			*pp;	/* pointer to block address */
346	xfs_bmbt_rec_t		*ep;	/* pointer to current extent */
347	xfs_bmbt_rec_t		last = {0, 0}; /* last extent in prev block */
348	xfs_bmbt_rec_t		*nextp;	/* pointer to next extent */
349	int			bp_release = 0;
350
351	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
352		return;
353	}
354
355	bno = NULLFSBLOCK;
356	mp = ip->i_mount;
357	ifp = XFS_IFORK_PTR(ip, whichfork);
358	block = ifp->if_broot;
359	/*
360	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
361	 */
362	level = be16_to_cpu(block->bb_level);
363	ASSERT(level > 0);
364	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
365	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
366	bno = be64_to_cpu(*pp);
367
368	ASSERT(bno != NULLFSBLOCK);
369	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
370	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
371
372	/*
373	 * Go down the tree until leaf level is reached, following the first
374	 * pointer (leftmost) at each level.
375	 */
376	while (level-- > 0) {
377		/* See if buf is in cur first */
378		bp_release = 0;
379		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
380		if (!bp) {
381			bp_release = 1;
382			error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
383						XFS_BMAP_BTREE_REF,
384						&xfs_bmbt_buf_ops);
385			if (error)
386				goto error_norelse;
387		}
388		block = XFS_BUF_TO_BLOCK(bp);
389		if (level == 0)
390			break;
391
392		/*
393		 * Check this block for basic sanity (increasing keys and
394		 * no duplicate blocks).
395		 */
396
397		xfs_check_block(block, mp, 0, 0);
398		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
399		bno = be64_to_cpu(*pp);
400		XFS_WANT_CORRUPTED_GOTO(mp,
401					XFS_FSB_SANITY_CHECK(mp, bno), error0);
402		if (bp_release) {
403			bp_release = 0;
404			xfs_trans_brelse(NULL, bp);
405		}
406	}
407
408	/*
409	 * Here with bp and block set to the leftmost leaf node in the tree.
410	 */
411	i = 0;
412
413	/*
414	 * Loop over all leaf nodes checking that all extents are in the right order.
415	 */
416	for (;;) {
417		xfs_fsblock_t	nextbno;
418		xfs_extnum_t	num_recs;
419
420
421		num_recs = xfs_btree_get_numrecs(block);
422
423		/*
424		 * Read-ahead the next leaf block, if any.
425		 */
426
427		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
428
429		/*
430		 * Check all the extents to make sure they are OK.
431		 * If we had a previous block, the last entry should
432		 * conform with the first entry in this one.
433		 */
434
435		ep = XFS_BMBT_REC_ADDR(mp, block, 1);
436		if (i) {
437			ASSERT(xfs_bmbt_disk_get_startoff(&last) +
438			       xfs_bmbt_disk_get_blockcount(&last) <=
439			       xfs_bmbt_disk_get_startoff(ep));
440		}
441		for (j = 1; j < num_recs; j++) {
442			nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
443			ASSERT(xfs_bmbt_disk_get_startoff(ep) +
444			       xfs_bmbt_disk_get_blockcount(ep) <=
445			       xfs_bmbt_disk_get_startoff(nextp));
446			ep = nextp;
447		}
448
449		last = *ep;
450		i += num_recs;
451		if (bp_release) {
452			bp_release = 0;
453			xfs_trans_brelse(NULL, bp);
454		}
455		bno = nextbno;
456		/*
457		 * If we've reached the end, stop.
458		 */
459		if (bno == NULLFSBLOCK)
460			break;
461
462		bp_release = 0;
463		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
464		if (!bp) {
465			bp_release = 1;
466			error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
467						XFS_BMAP_BTREE_REF,
468						&xfs_bmbt_buf_ops);
469			if (error)
470				goto error_norelse;
471		}
472		block = XFS_BUF_TO_BLOCK(bp);
473	}
474	if (bp_release) {
475		bp_release = 0;
476		xfs_trans_brelse(NULL, bp);
477	}
478	return;
479
480error0:
481	xfs_warn(mp, "%s: at error0", __func__);
482	if (bp_release)
483		xfs_trans_brelse(NULL, bp);
484error_norelse:
485	xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
486		__func__, i);
487	panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
488	return;
489}
490
491/*
492 * Add bmap trace insert entries for all the contents of the extent records.
493 */
494void
495xfs_bmap_trace_exlist(
496	xfs_inode_t	*ip,		/* incore inode pointer */
497	xfs_extnum_t	cnt,		/* count of entries in the list */
498	int		whichfork,	/* data or attr fork */
499	unsigned long	caller_ip)
500{
501	xfs_extnum_t	idx;		/* extent record index */
502	xfs_ifork_t	*ifp;		/* inode fork pointer */
503	int		state = 0;
504
505	if (whichfork == XFS_ATTR_FORK)
506		state |= BMAP_ATTRFORK;
507
508	ifp = XFS_IFORK_PTR(ip, whichfork);
509	ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
510	for (idx = 0; idx < cnt; idx++)
511		trace_xfs_extlist(ip, idx, whichfork, caller_ip);
512}
513
514/*
515 * Validate that the bmbt_irecs being returned from bmapi are valid
516 * given the caller's original parameters.  Specifically check the
517 * ranges of the returned irecs to ensure that they only extend beyond
518 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
519 */
520STATIC void
521xfs_bmap_validate_ret(
522	xfs_fileoff_t		bno,
523	xfs_filblks_t		len,
524	int			flags,
525	xfs_bmbt_irec_t		*mval,
526	int			nmap,
527	int			ret_nmap)
528{
529	int			i;		/* index to map values */
530
531	ASSERT(ret_nmap <= nmap);
532
533	for (i = 0; i < ret_nmap; i++) {
534		ASSERT(mval[i].br_blockcount > 0);
535		if (!(flags & XFS_BMAPI_ENTIRE)) {
536			ASSERT(mval[i].br_startoff >= bno);
537			ASSERT(mval[i].br_blockcount <= len);
538			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
539			       bno + len);
540		} else {
541			ASSERT(mval[i].br_startoff < bno + len);
542			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
543			       bno);
544		}
545		ASSERT(i == 0 ||
546		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
547		       mval[i].br_startoff);
548		ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
549		       mval[i].br_startblock != HOLESTARTBLOCK);
550		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
551		       mval[i].br_state == XFS_EXT_UNWRITTEN);
552	}
553}
554
555#else
556#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)		do { } while (0)
557#define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
558#endif /* DEBUG */
559
560/*
561 * bmap free list manipulation functions
562 */
563
564/*
565 * Add the extent to the list of extents to be free at transaction end.
566 * The list is maintained sorted (by block number).
567 */
568void
569xfs_bmap_add_free(
570	xfs_fsblock_t		bno,		/* fs block number of extent */
571	xfs_filblks_t		len,		/* length of extent */
572	xfs_bmap_free_t		*flist,		/* list of extents */
573	xfs_mount_t		*mp)		/* mount point structure */
574{
575	xfs_bmap_free_item_t	*cur;		/* current (next) element */
576	xfs_bmap_free_item_t	*new;		/* new element */
577	xfs_bmap_free_item_t	*prev;		/* previous element */
578#ifdef DEBUG
579	xfs_agnumber_t		agno;
580	xfs_agblock_t		agbno;
581
582	ASSERT(bno != NULLFSBLOCK);
583	ASSERT(len > 0);
584	ASSERT(len <= MAXEXTLEN);
585	ASSERT(!isnullstartblock(bno));
586	agno = XFS_FSB_TO_AGNO(mp, bno);
587	agbno = XFS_FSB_TO_AGBNO(mp, bno);
588	ASSERT(agno < mp->m_sb.sb_agcount);
589	ASSERT(agbno < mp->m_sb.sb_agblocks);
590	ASSERT(len < mp->m_sb.sb_agblocks);
591	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
592#endif
593	ASSERT(xfs_bmap_free_item_zone != NULL);
594	new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
595	new->xbfi_startblock = bno;
596	new->xbfi_blockcount = (xfs_extlen_t)len;
597	for (prev = NULL, cur = flist->xbf_first;
598	     cur != NULL;
599	     prev = cur, cur = cur->xbfi_next) {
600		if (cur->xbfi_startblock >= bno)
601			break;
602	}
603	if (prev)
604		prev->xbfi_next = new;
605	else
606		flist->xbf_first = new;
607	new->xbfi_next = cur;
608	flist->xbf_count++;
609}
610
611/*
612 * Remove the entry "free" from the free item list.  Prev points to the
613 * previous entry, unless "free" is the head of the list.
614 */
615void
616xfs_bmap_del_free(
617	xfs_bmap_free_t		*flist,	/* free item list header */
618	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
619	xfs_bmap_free_item_t	*free)	/* list item to be freed */
620{
621	if (prev)
622		prev->xbfi_next = free->xbfi_next;
623	else
624		flist->xbf_first = free->xbfi_next;
625	flist->xbf_count--;
626	kmem_zone_free(xfs_bmap_free_item_zone, free);
627}
628
629/*
630 * Free up any items left in the list.
631 */
632void
633xfs_bmap_cancel(
634	xfs_bmap_free_t		*flist)	/* list of bmap_free_items */
635{
636	xfs_bmap_free_item_t	*free;	/* free list item */
637	xfs_bmap_free_item_t	*next;
638
639	if (flist->xbf_count == 0)
640		return;
641	ASSERT(flist->xbf_first != NULL);
642	for (free = flist->xbf_first; free; free = next) {
643		next = free->xbfi_next;
644		xfs_bmap_del_free(flist, NULL, free);
645	}
646	ASSERT(flist->xbf_count == 0);
647}
648
649/*
650 * Inode fork format manipulation functions
651 */
652
653/*
654 * Transform a btree format file with only one leaf node, where the
655 * extents list will fit in the inode, into an extents format file.
656 * Since the file extents are already in-core, all we have to do is
657 * give up the space for the btree root and pitch the leaf block.
658 */
659STATIC int				/* error */
660xfs_bmap_btree_to_extents(
661	xfs_trans_t		*tp,	/* transaction pointer */
662	xfs_inode_t		*ip,	/* incore inode pointer */
663	xfs_btree_cur_t		*cur,	/* btree cursor */
664	int			*logflagsp, /* inode logging flags */
665	int			whichfork)  /* data or attr fork */
666{
667	/* REFERENCED */
668	struct xfs_btree_block	*cblock;/* child btree block */
669	xfs_fsblock_t		cbno;	/* child block number */
670	xfs_buf_t		*cbp;	/* child block's buffer */
671	int			error;	/* error return value */
672	xfs_ifork_t		*ifp;	/* inode fork data */
673	xfs_mount_t		*mp;	/* mount point structure */
674	__be64			*pp;	/* ptr to block address */
675	struct xfs_btree_block	*rblock;/* root btree block */
676
677	mp = ip->i_mount;
678	ifp = XFS_IFORK_PTR(ip, whichfork);
679	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
680	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
681	rblock = ifp->if_broot;
682	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
683	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
684	ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
685	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
686	cbno = be64_to_cpu(*pp);
687	*logflagsp = 0;
688#ifdef DEBUG
689	if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
690		return error;
691#endif
692	error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
693				&xfs_bmbt_buf_ops);
694	if (error)
695		return error;
696	cblock = XFS_BUF_TO_BLOCK(cbp);
697	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
698		return error;
699	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
700	ip->i_d.di_nblocks--;
701	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
702	xfs_trans_binval(tp, cbp);
703	if (cur->bc_bufs[0] == cbp)
704		cur->bc_bufs[0] = NULL;
705	xfs_iroot_realloc(ip, -1, whichfork);
706	ASSERT(ifp->if_broot == NULL);
707	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
708	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
709	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
710	return 0;
711}
712
713/*
714 * Convert an extents-format file into a btree-format file.
715 * The new file will have a root block (in the inode) and a single child block.
716 */
717STATIC int					/* error */
718xfs_bmap_extents_to_btree(
719	xfs_trans_t		*tp,		/* transaction pointer */
720	xfs_inode_t		*ip,		/* incore inode pointer */
721	xfs_fsblock_t		*firstblock,	/* first-block-allocated */
722	xfs_bmap_free_t		*flist,		/* blocks freed in xaction */
723	xfs_btree_cur_t		**curp,		/* cursor returned to caller */
724	int			wasdel,		/* converting a delayed alloc */
725	int			*logflagsp,	/* inode logging flags */
726	int			whichfork)	/* data or attr fork */
727{
728	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
729	xfs_buf_t		*abp;		/* buffer for ablock */
730	xfs_alloc_arg_t		args;		/* allocation arguments */
731	xfs_bmbt_rec_t		*arp;		/* child record pointer */
732	struct xfs_btree_block	*block;		/* btree root block */
733	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
734	xfs_bmbt_rec_host_t	*ep;		/* extent record pointer */
735	int			error;		/* error return value */
736	xfs_extnum_t		i, cnt;		/* extent record index */
737	xfs_ifork_t		*ifp;		/* inode fork pointer */
738	xfs_bmbt_key_t		*kp;		/* root block key pointer */
739	xfs_mount_t		*mp;		/* mount structure */
740	xfs_extnum_t		nextents;	/* number of file extents */
741	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
742
743	mp = ip->i_mount;
744	ifp = XFS_IFORK_PTR(ip, whichfork);
745	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
746
747	/*
748	 * Make space in the inode incore.
749	 */
750	xfs_iroot_realloc(ip, 1, whichfork);
751	ifp->if_flags |= XFS_IFBROOT;
752
753	/*
754	 * Fill in the root.
755	 */
756	block = ifp->if_broot;
757	if (xfs_sb_version_hascrc(&mp->m_sb))
758		xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
759				 XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
760				 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
761	else
762		xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
763				 XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
764				 XFS_BTREE_LONG_PTRS);
765
766	/*
767	 * Need a cursor.  Can't allocate until bb_level is filled in.
768	 */
769	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
770	cur->bc_private.b.firstblock = *firstblock;
771	cur->bc_private.b.flist = flist;
772	cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
773	/*
774	 * Convert to a btree with two levels, one record in root.
775	 */
776	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
777	memset(&args, 0, sizeof(args));
778	args.tp = tp;
779	args.mp = mp;
780	args.firstblock = *firstblock;
781	if (*firstblock == NULLFSBLOCK) {
782		args.type = XFS_ALLOCTYPE_START_BNO;
783		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
784	} else if (flist->xbf_low) {
785		args.type = XFS_ALLOCTYPE_START_BNO;
786		args.fsbno = *firstblock;
787	} else {
788		args.type = XFS_ALLOCTYPE_NEAR_BNO;
789		args.fsbno = *firstblock;
790	}
791	args.minlen = args.maxlen = args.prod = 1;
792	args.wasdel = wasdel;
793	*logflagsp = 0;
794	if ((error = xfs_alloc_vextent(&args))) {
795		xfs_iroot_realloc(ip, -1, whichfork);
796		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
797		return error;
798	}
799	/*
800	 * Allocation can't fail, the space was reserved.
801	 */
802	ASSERT(args.fsbno != NULLFSBLOCK);
803	ASSERT(*firstblock == NULLFSBLOCK ||
804	       args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
805	       (flist->xbf_low &&
806		args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
807	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
808	cur->bc_private.b.allocated++;
809	ip->i_d.di_nblocks++;
810	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
811	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
812	/*
813	 * Fill in the child block.
814	 */
815	abp->b_ops = &xfs_bmbt_buf_ops;
816	ablock = XFS_BUF_TO_BLOCK(abp);
817	if (xfs_sb_version_hascrc(&mp->m_sb))
818		xfs_btree_init_block_int(mp, ablock, abp->b_bn,
819				XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
820				XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
821	else
822		xfs_btree_init_block_int(mp, ablock, abp->b_bn,
823				XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
824				XFS_BTREE_LONG_PTRS);
825
826	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
827	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
828	for (cnt = i = 0; i < nextents; i++) {
829		ep = xfs_iext_get_ext(ifp, i);
830		if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
831			arp->l0 = cpu_to_be64(ep->l0);
832			arp->l1 = cpu_to_be64(ep->l1);
833			arp++; cnt++;
834		}
835	}
836	ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
837	xfs_btree_set_numrecs(ablock, cnt);
838
839	/*
840	 * Fill in the root key and pointer.
841	 */
842	kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
843	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
844	kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
845	pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
846						be16_to_cpu(block->bb_level)));
847	*pp = cpu_to_be64(args.fsbno);
848
849	/*
850	 * Do all this logging at the end so that
851	 * the root is at the right level.
852	 */
853	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
854	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
855	ASSERT(*curp == NULL);
856	*curp = cur;
857	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
858	return 0;
859}
860
861/*
862 * Convert a local file to an extents file.
863 * This code is out of bounds for data forks of regular files,
864 * since the file data needs to get logged so things will stay consistent.
865 * (The bmap-level manipulations are ok, though).
866 */
867void
868xfs_bmap_local_to_extents_empty(
869	struct xfs_inode	*ip,
870	int			whichfork)
871{
872	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
873
874	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
875	ASSERT(ifp->if_bytes == 0);
876	ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
877
878	xfs_bmap_forkoff_reset(ip, whichfork);
879	ifp->if_flags &= ~XFS_IFINLINE;
880	ifp->if_flags |= XFS_IFEXTENTS;
881	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
882}
883
884
885STATIC int				/* error */
886xfs_bmap_local_to_extents(
887	xfs_trans_t	*tp,		/* transaction pointer */
888	xfs_inode_t	*ip,		/* incore inode pointer */
889	xfs_fsblock_t	*firstblock,	/* first block allocated in xaction */
890	xfs_extlen_t	total,		/* total blocks needed by transaction */
891	int		*logflagsp,	/* inode logging flags */
892	int		whichfork,
893	void		(*init_fn)(struct xfs_trans *tp,
894				   struct xfs_buf *bp,
895				   struct xfs_inode *ip,
896				   struct xfs_ifork *ifp))
897{
898	int		error = 0;
899	int		flags;		/* logging flags returned */
900	xfs_ifork_t	*ifp;		/* inode fork pointer */
901	xfs_alloc_arg_t	args;		/* allocation arguments */
902	xfs_buf_t	*bp;		/* buffer for extent block */
903	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
904
905	/*
906	 * We don't want to deal with the case of keeping inode data inline yet.
907	 * So sending the data fork of a regular inode is invalid.
908	 */
909	ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
910	ifp = XFS_IFORK_PTR(ip, whichfork);
911	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
912
913	if (!ifp->if_bytes) {
914		xfs_bmap_local_to_extents_empty(ip, whichfork);
915		flags = XFS_ILOG_CORE;
916		goto done;
917	}
918
919	flags = 0;
920	error = 0;
921	ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
922								XFS_IFINLINE);
923	memset(&args, 0, sizeof(args));
924	args.tp = tp;
925	args.mp = ip->i_mount;
926	args.firstblock = *firstblock;
927	/*
928	 * Allocate a block.  We know we need only one, since the
929	 * file currently fits in an inode.
930	 */
931	if (*firstblock == NULLFSBLOCK) {
932		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
933		args.type = XFS_ALLOCTYPE_START_BNO;
934	} else {
935		args.fsbno = *firstblock;
936		args.type = XFS_ALLOCTYPE_NEAR_BNO;
937	}
938	args.total = total;
939	args.minlen = args.maxlen = args.prod = 1;
940	error = xfs_alloc_vextent(&args);
941	if (error)
942		goto done;
943
944	/* Can't fail, the space was reserved. */
945	ASSERT(args.fsbno != NULLFSBLOCK);
946	ASSERT(args.len == 1);
947	*firstblock = args.fsbno;
948	bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
949
950	/*
951	 * Initialise the block and copy the data
952	 *
953	 * Note: init_fn must set the buffer log item type correctly!
954	 */
955	init_fn(tp, bp, ip, ifp);
956
957	/* account for the change in fork size and log everything */
958	xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
959	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
960	xfs_bmap_local_to_extents_empty(ip, whichfork);
961	flags |= XFS_ILOG_CORE;
962
963	xfs_iext_add(ifp, 0, 1);
964	ep = xfs_iext_get_ext(ifp, 0);
965	xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
966	trace_xfs_bmap_post_update(ip, 0,
967			whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
968			_THIS_IP_);
969	XFS_IFORK_NEXT_SET(ip, whichfork, 1);
970	ip->i_d.di_nblocks = 1;
971	xfs_trans_mod_dquot_byino(tp, ip,
972		XFS_TRANS_DQ_BCOUNT, 1L);
973	flags |= xfs_ilog_fext(whichfork);
974
975done:
976	*logflagsp = flags;
977	return error;
978}
979
980/*
981 * Called from xfs_bmap_add_attrfork to handle btree format files.
982 */
983STATIC int					/* error */
984xfs_bmap_add_attrfork_btree(
985	xfs_trans_t		*tp,		/* transaction pointer */
986	xfs_inode_t		*ip,		/* incore inode pointer */
987	xfs_fsblock_t		*firstblock,	/* first block allocated */
988	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
989	int			*flags)		/* inode logging flags */
990{
991	xfs_btree_cur_t		*cur;		/* btree cursor */
992	int			error;		/* error return value */
993	xfs_mount_t		*mp;		/* file system mount struct */
994	int			stat;		/* newroot status */
995
996	mp = ip->i_mount;
997	if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
998		*flags |= XFS_ILOG_DBROOT;
999	else {
1000		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
1001		cur->bc_private.b.flist = flist;
1002		cur->bc_private.b.firstblock = *firstblock;
1003		if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
1004			goto error0;
1005		/* must be at least one entry */
1006		XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
1007		if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
1008			goto error0;
1009		if (stat == 0) {
1010			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1011			return -ENOSPC;
1012		}
1013		*firstblock = cur->bc_private.b.firstblock;
1014		cur->bc_private.b.allocated = 0;
1015		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1016	}
1017	return 0;
1018error0:
1019	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1020	return error;
1021}
1022
1023/*
1024 * Called from xfs_bmap_add_attrfork to handle extents format files.
1025 */
1026STATIC int					/* error */
1027xfs_bmap_add_attrfork_extents(
1028	xfs_trans_t		*tp,		/* transaction pointer */
1029	xfs_inode_t		*ip,		/* incore inode pointer */
1030	xfs_fsblock_t		*firstblock,	/* first block allocated */
1031	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
1032	int			*flags)		/* inode logging flags */
1033{
1034	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
1035	int			error;		/* error return value */
1036
1037	if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1038		return 0;
1039	cur = NULL;
1040	error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
1041		flags, XFS_DATA_FORK);
1042	if (cur) {
1043		cur->bc_private.b.allocated = 0;
1044		xfs_btree_del_cursor(cur,
1045			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1046	}
1047	return error;
1048}
1049
1050/*
1051 * Called from xfs_bmap_add_attrfork to handle local format files. Each
1052 * different data fork content type needs a different callout to do the
1053 * conversion. Some are basic and only require special block initialisation
1054 * callouts for the data formating, others (directories) are so specialised they
1055 * handle everything themselves.
1056 *
1057 * XXX (dgc): investigate whether directory conversion can use the generic
1058 * formatting callout. It should be possible - it's just a very complex
1059 * formatter.
1060 */
1061STATIC int					/* error */
1062xfs_bmap_add_attrfork_local(
1063	xfs_trans_t		*tp,		/* transaction pointer */
1064	xfs_inode_t		*ip,		/* incore inode pointer */
1065	xfs_fsblock_t		*firstblock,	/* first block allocated */
1066	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
1067	int			*flags)		/* inode logging flags */
1068{
1069	xfs_da_args_t		dargs;		/* args for dir/attr code */
1070
1071	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1072		return 0;
1073
1074	if (S_ISDIR(ip->i_d.di_mode)) {
1075		memset(&dargs, 0, sizeof(dargs));
1076		dargs.geo = ip->i_mount->m_dir_geo;
1077		dargs.dp = ip;
1078		dargs.firstblock = firstblock;
1079		dargs.flist = flist;
1080		dargs.total = dargs.geo->fsbcount;
1081		dargs.whichfork = XFS_DATA_FORK;
1082		dargs.trans = tp;
1083		return xfs_dir2_sf_to_block(&dargs);
1084	}
1085
1086	if (S_ISLNK(ip->i_d.di_mode))
1087		return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1088						 flags, XFS_DATA_FORK,
1089						 xfs_symlink_local_to_remote);
1090
1091	/* should only be called for types that support local format data */
1092	ASSERT(0);
1093	return -EFSCORRUPTED;
1094}
1095
1096/*
1097 * Convert inode from non-attributed to attributed.
1098 * Must not be in a transaction, ip must not be locked.
1099 */
1100int						/* error code */
1101xfs_bmap_add_attrfork(
1102	xfs_inode_t		*ip,		/* incore inode pointer */
1103	int			size,		/* space new attribute needs */
1104	int			rsvd)		/* xact may use reserved blks */
1105{
1106	xfs_fsblock_t		firstblock;	/* 1st block/ag allocated */
1107	xfs_bmap_free_t		flist;		/* freed extent records */
1108	xfs_mount_t		*mp;		/* mount structure */
1109	xfs_trans_t		*tp;		/* transaction pointer */
1110	int			blks;		/* space reservation */
1111	int			version = 1;	/* superblock attr version */
1112	int			committed;	/* xaction was committed */
1113	int			logflags;	/* logging flags */
1114	int			error;		/* error return value */
1115	int			cancel_flags = 0;
1116
1117	ASSERT(XFS_IFORK_Q(ip) == 0);
1118
1119	mp = ip->i_mount;
1120	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1121	tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
1122	blks = XFS_ADDAFORK_SPACE_RES(mp);
1123	if (rsvd)
1124		tp->t_flags |= XFS_TRANS_RESERVE;
1125	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
1126	if (error) {
1127		xfs_trans_cancel(tp, 0);
1128		return error;
1129	}
1130	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1131	xfs_ilock(ip, XFS_ILOCK_EXCL);
1132	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1133			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1134			XFS_QMOPT_RES_REGBLKS);
1135	if (error)
1136		goto trans_cancel;
1137	cancel_flags |= XFS_TRANS_ABORT;
1138	if (XFS_IFORK_Q(ip))
1139		goto trans_cancel;
1140	if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1141		/*
1142		 * For inodes coming from pre-6.2 filesystems.
1143		 */
1144		ASSERT(ip->i_d.di_aformat == 0);
1145		ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1146	}
1147	ASSERT(ip->i_d.di_anextents == 0);
1148
1149	xfs_trans_ijoin(tp, ip, 0);
1150	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1151
1152	switch (ip->i_d.di_format) {
1153	case XFS_DINODE_FMT_DEV:
1154		ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1155		break;
1156	case XFS_DINODE_FMT_UUID:
1157		ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1158		break;
1159	case XFS_DINODE_FMT_LOCAL:
1160	case XFS_DINODE_FMT_EXTENTS:
1161	case XFS_DINODE_FMT_BTREE:
1162		ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1163		if (!ip->i_d.di_forkoff)
1164			ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1165		else if (mp->m_flags & XFS_MOUNT_ATTR2)
1166			version = 2;
1167		break;
1168	default:
1169		ASSERT(0);
1170		error = -EINVAL;
1171		goto trans_cancel;
1172	}
1173
1174	ASSERT(ip->i_afp == NULL);
1175	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1176	ip->i_afp->if_flags = XFS_IFEXTENTS;
1177	logflags = 0;
1178	xfs_bmap_init(&flist, &firstblock);
1179	switch (ip->i_d.di_format) {
1180	case XFS_DINODE_FMT_LOCAL:
1181		error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
1182			&logflags);
1183		break;
1184	case XFS_DINODE_FMT_EXTENTS:
1185		error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1186			&flist, &logflags);
1187		break;
1188	case XFS_DINODE_FMT_BTREE:
1189		error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
1190			&logflags);
1191		break;
1192	default:
1193		error = 0;
1194		break;
1195	}
1196	if (logflags)
1197		xfs_trans_log_inode(tp, ip, logflags);
1198	if (error)
1199		goto bmap_cancel;
1200	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1201	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1202		bool log_sb = false;
1203
1204		spin_lock(&mp->m_sb_lock);
1205		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1206			xfs_sb_version_addattr(&mp->m_sb);
1207			log_sb = true;
1208		}
1209		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1210			xfs_sb_version_addattr2(&mp->m_sb);
1211			log_sb = true;
1212		}
1213		spin_unlock(&mp->m_sb_lock);
1214		if (log_sb)
1215			xfs_log_sb(tp);
1216	}
1217
1218	error = xfs_bmap_finish(&tp, &flist, &committed);
1219	if (error)
1220		goto bmap_cancel;
1221	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1222	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1223	return error;
1224
1225bmap_cancel:
1226	xfs_bmap_cancel(&flist);
1227trans_cancel:
1228	xfs_trans_cancel(tp, cancel_flags);
1229	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1230	return error;
1231}
1232
1233/*
1234 * Internal and external extent tree search functions.
1235 */
1236
1237/*
1238 * Read in the extents to if_extents.
1239 * All inode fields are set up by caller, we just traverse the btree
1240 * and copy the records in. If the file system cannot contain unwritten
1241 * extents, the records are checked for no "state" flags.
1242 */
1243int					/* error */
1244xfs_bmap_read_extents(
1245	xfs_trans_t		*tp,	/* transaction pointer */
1246	xfs_inode_t		*ip,	/* incore inode */
1247	int			whichfork) /* data or attr fork */
1248{
1249	struct xfs_btree_block	*block;	/* current btree block */
1250	xfs_fsblock_t		bno;	/* block # of "block" */
1251	xfs_buf_t		*bp;	/* buffer for "block" */
1252	int			error;	/* error return value */
1253	xfs_exntfmt_t		exntf;	/* XFS_EXTFMT_NOSTATE, if checking */
1254	xfs_extnum_t		i, j;	/* index into the extents list */
1255	xfs_ifork_t		*ifp;	/* fork structure */
1256	int			level;	/* btree level, for checking */
1257	xfs_mount_t		*mp;	/* file system mount structure */
1258	__be64			*pp;	/* pointer to block address */
1259	/* REFERENCED */
1260	xfs_extnum_t		room;	/* number of entries there's room for */
1261
1262	bno = NULLFSBLOCK;
1263	mp = ip->i_mount;
1264	ifp = XFS_IFORK_PTR(ip, whichfork);
1265	exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1266					XFS_EXTFMT_INODE(ip);
1267	block = ifp->if_broot;
1268	/*
1269	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1270	 */
1271	level = be16_to_cpu(block->bb_level);
1272	ASSERT(level > 0);
1273	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1274	bno = be64_to_cpu(*pp);
1275	ASSERT(bno != NULLFSBLOCK);
1276	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1277	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1278	/*
1279	 * Go down the tree until leaf level is reached, following the first
1280	 * pointer (leftmost) at each level.
1281	 */
1282	while (level-- > 0) {
1283		error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1284				XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1285		if (error)
1286			return error;
1287		block = XFS_BUF_TO_BLOCK(bp);
1288		if (level == 0)
1289			break;
1290		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1291		bno = be64_to_cpu(*pp);
1292		XFS_WANT_CORRUPTED_GOTO(mp,
1293			XFS_FSB_SANITY_CHECK(mp, bno), error0);
1294		xfs_trans_brelse(tp, bp);
1295	}
1296	/*
1297	 * Here with bp and block set to the leftmost leaf node in the tree.
1298	 */
1299	room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1300	i = 0;
1301	/*
1302	 * Loop over all leaf nodes.  Copy information to the extent records.
1303	 */
1304	for (;;) {
1305		xfs_bmbt_rec_t	*frp;
1306		xfs_fsblock_t	nextbno;
1307		xfs_extnum_t	num_recs;
1308		xfs_extnum_t	start;
1309
1310		num_recs = xfs_btree_get_numrecs(block);
1311		if (unlikely(i + num_recs > room)) {
1312			ASSERT(i + num_recs <= room);
1313			xfs_warn(ip->i_mount,
1314				"corrupt dinode %Lu, (btree extents).",
1315				(unsigned long long) ip->i_ino);
1316			XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1317				XFS_ERRLEVEL_LOW, ip->i_mount, block);
1318			goto error0;
1319		}
1320		/*
1321		 * Read-ahead the next leaf block, if any.
1322		 */
1323		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1324		if (nextbno != NULLFSBLOCK)
1325			xfs_btree_reada_bufl(mp, nextbno, 1,
1326					     &xfs_bmbt_buf_ops);
1327		/*
1328		 * Copy records into the extent records.
1329		 */
1330		frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1331		start = i;
1332		for (j = 0; j < num_recs; j++, i++, frp++) {
1333			xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1334			trp->l0 = be64_to_cpu(frp->l0);
1335			trp->l1 = be64_to_cpu(frp->l1);
1336		}
1337		if (exntf == XFS_EXTFMT_NOSTATE) {
1338			/*
1339			 * Check all attribute bmap btree records and
1340			 * any "older" data bmap btree records for a
1341			 * set bit in the "extent flag" position.
1342			 */
1343			if (unlikely(xfs_check_nostate_extents(ifp,
1344					start, num_recs))) {
1345				XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1346						 XFS_ERRLEVEL_LOW,
1347						 ip->i_mount);
1348				goto error0;
1349			}
1350		}
1351		xfs_trans_brelse(tp, bp);
1352		bno = nextbno;
1353		/*
1354		 * If we've reached the end, stop.
1355		 */
1356		if (bno == NULLFSBLOCK)
1357			break;
1358		error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1359				XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1360		if (error)
1361			return error;
1362		block = XFS_BUF_TO_BLOCK(bp);
1363	}
1364	ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1365	ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1366	XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1367	return 0;
1368error0:
1369	xfs_trans_brelse(tp, bp);
1370	return -EFSCORRUPTED;
1371}
1372
1373
1374/*
1375 * Search the extent records for the entry containing block bno.
1376 * If bno lies in a hole, point to the next entry.  If bno lies
1377 * past eof, *eofp will be set, and *prevp will contain the last
1378 * entry (null if none).  Else, *lastxp will be set to the index
1379 * of the found entry; *gotp will contain the entry.
1380 */
1381STATIC xfs_bmbt_rec_host_t *		/* pointer to found extent entry */
1382xfs_bmap_search_multi_extents(
1383	xfs_ifork_t	*ifp,		/* inode fork pointer */
1384	xfs_fileoff_t	bno,		/* block number searched for */
1385	int		*eofp,		/* out: end of file found */
1386	xfs_extnum_t	*lastxp,	/* out: last extent index */
1387	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
1388	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
1389{
1390	xfs_bmbt_rec_host_t *ep;		/* extent record pointer */
1391	xfs_extnum_t	lastx;		/* last extent index */
1392
1393	/*
1394	 * Initialize the extent entry structure to catch access to
1395	 * uninitialized br_startblock field.
1396	 */
1397	gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1398	gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1399	gotp->br_state = XFS_EXT_INVALID;
1400	gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1401	prevp->br_startoff = NULLFILEOFF;
1402
1403	ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1404	if (lastx > 0) {
1405		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1406	}
1407	if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1408		xfs_bmbt_get_all(ep, gotp);
1409		*eofp = 0;
1410	} else {
1411		if (lastx > 0) {
1412			*gotp = *prevp;
1413		}
1414		*eofp = 1;
1415		ep = NULL;
1416	}
1417	*lastxp = lastx;
1418	return ep;
1419}
1420
1421/*
1422 * Search the extents list for the inode, for the extent containing bno.
1423 * If bno lies in a hole, point to the next entry.  If bno lies past eof,
1424 * *eofp will be set, and *prevp will contain the last entry (null if none).
1425 * Else, *lastxp will be set to the index of the found
1426 * entry; *gotp will contain the entry.
1427 */
1428STATIC xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
1429xfs_bmap_search_extents(
1430	xfs_inode_t     *ip,            /* incore inode pointer */
1431	xfs_fileoff_t   bno,            /* block number searched for */
1432	int             fork,      	/* data or attr fork */
1433	int             *eofp,          /* out: end of file found */
1434	xfs_extnum_t    *lastxp,        /* out: last extent index */
1435	xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1436	xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1437{
1438	xfs_ifork_t	*ifp;		/* inode fork pointer */
1439	xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
1440
1441	XFS_STATS_INC(xs_look_exlist);
1442	ifp = XFS_IFORK_PTR(ip, fork);
1443
1444	ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1445
1446	if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1447		     !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1448		xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1449				"Access to block zero in inode %llu "
1450				"start_block: %llx start_off: %llx "
1451				"blkcnt: %llx extent-state: %x lastx: %x",
1452			(unsigned long long)ip->i_ino,
1453			(unsigned long long)gotp->br_startblock,
1454			(unsigned long long)gotp->br_startoff,
1455			(unsigned long long)gotp->br_blockcount,
1456			gotp->br_state, *lastxp);
1457		*lastxp = NULLEXTNUM;
1458		*eofp = 1;
1459		return NULL;
1460	}
1461	return ep;
1462}
1463
1464/*
1465 * Returns the file-relative block number of the first unused block(s)
1466 * in the file with at least "len" logically contiguous blocks free.
1467 * This is the lowest-address hole if the file has holes, else the first block
1468 * past the end of file.
1469 * Return 0 if the file is currently local (in-inode).
1470 */
1471int						/* error */
1472xfs_bmap_first_unused(
1473	xfs_trans_t	*tp,			/* transaction pointer */
1474	xfs_inode_t	*ip,			/* incore inode */
1475	xfs_extlen_t	len,			/* size of hole to find */
1476	xfs_fileoff_t	*first_unused,		/* unused block */
1477	int		whichfork)		/* data or attr fork */
1478{
1479	int		error;			/* error return value */
1480	int		idx;			/* extent record index */
1481	xfs_ifork_t	*ifp;			/* inode fork pointer */
1482	xfs_fileoff_t	lastaddr;		/* last block number seen */
1483	xfs_fileoff_t	lowest;			/* lowest useful block */
1484	xfs_fileoff_t	max;			/* starting useful block */
1485	xfs_fileoff_t	off;			/* offset for this block */
1486	xfs_extnum_t	nextents;		/* number of extent entries */
1487
1488	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1489	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1490	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1491	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1492		*first_unused = 0;
1493		return 0;
1494	}
1495	ifp = XFS_IFORK_PTR(ip, whichfork);
1496	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1497	    (error = xfs_iread_extents(tp, ip, whichfork)))
1498		return error;
1499	lowest = *first_unused;
1500	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1501	for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1502		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1503		off = xfs_bmbt_get_startoff(ep);
1504		/*
1505		 * See if the hole before this extent will work.
1506		 */
1507		if (off >= lowest + len && off - max >= len) {
1508			*first_unused = max;
1509			return 0;
1510		}
1511		lastaddr = off + xfs_bmbt_get_blockcount(ep);
1512		max = XFS_FILEOFF_MAX(lastaddr, lowest);
1513	}
1514	*first_unused = max;
1515	return 0;
1516}
1517
1518/*
1519 * Returns the file-relative block number of the last block - 1 before
1520 * last_block (input value) in the file.
1521 * This is not based on i_size, it is based on the extent records.
1522 * Returns 0 for local files, as they do not have extent records.
1523 */
1524int						/* error */
1525xfs_bmap_last_before(
1526	xfs_trans_t	*tp,			/* transaction pointer */
1527	xfs_inode_t	*ip,			/* incore inode */
1528	xfs_fileoff_t	*last_block,		/* last block */
1529	int		whichfork)		/* data or attr fork */
1530{
1531	xfs_fileoff_t	bno;			/* input file offset */
1532	int		eof;			/* hit end of file */
1533	xfs_bmbt_rec_host_t *ep;		/* pointer to last extent */
1534	int		error;			/* error return value */
1535	xfs_bmbt_irec_t	got;			/* current extent value */
1536	xfs_ifork_t	*ifp;			/* inode fork pointer */
1537	xfs_extnum_t	lastx;			/* last extent used */
1538	xfs_bmbt_irec_t	prev;			/* previous extent value */
1539
1540	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1541	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1542	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1543	       return -EIO;
1544	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1545		*last_block = 0;
1546		return 0;
1547	}
1548	ifp = XFS_IFORK_PTR(ip, whichfork);
1549	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1550	    (error = xfs_iread_extents(tp, ip, whichfork)))
1551		return error;
1552	bno = *last_block - 1;
1553	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1554		&prev);
1555	if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1556		if (prev.br_startoff == NULLFILEOFF)
1557			*last_block = 0;
1558		else
1559			*last_block = prev.br_startoff + prev.br_blockcount;
1560	}
1561	/*
1562	 * Otherwise *last_block is already the right answer.
1563	 */
1564	return 0;
1565}
1566
1567int
1568xfs_bmap_last_extent(
1569	struct xfs_trans	*tp,
1570	struct xfs_inode	*ip,
1571	int			whichfork,
1572	struct xfs_bmbt_irec	*rec,
1573	int			*is_empty)
1574{
1575	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
1576	int			error;
1577	int			nextents;
1578
1579	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1580		error = xfs_iread_extents(tp, ip, whichfork);
1581		if (error)
1582			return error;
1583	}
1584
1585	nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1586	if (nextents == 0) {
1587		*is_empty = 1;
1588		return 0;
1589	}
1590
1591	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1592	*is_empty = 0;
1593	return 0;
1594}
1595
1596/*
1597 * Check the last inode extent to determine whether this allocation will result
1598 * in blocks being allocated at the end of the file. When we allocate new data
1599 * blocks at the end of the file which do not start at the previous data block,
1600 * we will try to align the new blocks at stripe unit boundaries.
1601 *
1602 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1603 * at, or past the EOF.
1604 */
1605STATIC int
1606xfs_bmap_isaeof(
1607	struct xfs_bmalloca	*bma,
1608	int			whichfork)
1609{
1610	struct xfs_bmbt_irec	rec;
1611	int			is_empty;
1612	int			error;
1613
1614	bma->aeof = 0;
1615	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1616				     &is_empty);
1617	if (error)
1618		return error;
1619
1620	if (is_empty) {
1621		bma->aeof = 1;
1622		return 0;
1623	}
1624
1625	/*
1626	 * Check if we are allocation or past the last extent, or at least into
1627	 * the last delayed allocated extent.
1628	 */
1629	bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1630		(bma->offset >= rec.br_startoff &&
1631		 isnullstartblock(rec.br_startblock));
1632	return 0;
1633}
1634
1635/*
1636 * Returns the file-relative block number of the first block past eof in
1637 * the file.  This is not based on i_size, it is based on the extent records.
1638 * Returns 0 for local files, as they do not have extent records.
1639 */
1640int
1641xfs_bmap_last_offset(
1642	struct xfs_inode	*ip,
1643	xfs_fileoff_t		*last_block,
1644	int			whichfork)
1645{
1646	struct xfs_bmbt_irec	rec;
1647	int			is_empty;
1648	int			error;
1649
1650	*last_block = 0;
1651
1652	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1653		return 0;
1654
1655	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1656	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1657	       return -EIO;
1658
1659	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1660	if (error || is_empty)
1661		return error;
1662
1663	*last_block = rec.br_startoff + rec.br_blockcount;
1664	return 0;
1665}
1666
1667/*
1668 * Returns whether the selected fork of the inode has exactly one
1669 * block or not.  For the data fork we check this matches di_size,
1670 * implying the file's range is 0..bsize-1.
1671 */
1672int					/* 1=>1 block, 0=>otherwise */
1673xfs_bmap_one_block(
1674	xfs_inode_t	*ip,		/* incore inode */
1675	int		whichfork)	/* data or attr fork */
1676{
1677	xfs_bmbt_rec_host_t *ep;	/* ptr to fork's extent */
1678	xfs_ifork_t	*ifp;		/* inode fork pointer */
1679	int		rval;		/* return value */
1680	xfs_bmbt_irec_t	s;		/* internal version of extent */
1681
1682#ifndef DEBUG
1683	if (whichfork == XFS_DATA_FORK)
1684		return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1685#endif	/* !DEBUG */
1686	if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1687		return 0;
1688	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1689		return 0;
1690	ifp = XFS_IFORK_PTR(ip, whichfork);
1691	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1692	ep = xfs_iext_get_ext(ifp, 0);
1693	xfs_bmbt_get_all(ep, &s);
1694	rval = s.br_startoff == 0 && s.br_blockcount == 1;
1695	if (rval && whichfork == XFS_DATA_FORK)
1696		ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1697	return rval;
1698}
1699
1700/*
1701 * Extent tree manipulation functions used during allocation.
1702 */
1703
1704/*
1705 * Convert a delayed allocation to a real allocation.
1706 */
1707STATIC int				/* error */
1708xfs_bmap_add_extent_delay_real(
1709	struct xfs_bmalloca	*bma)
1710{
1711	struct xfs_bmbt_irec	*new = &bma->got;
1712	int			diff;	/* temp value */
1713	xfs_bmbt_rec_host_t	*ep;	/* extent entry for idx */
1714	int			error;	/* error return value */
1715	int			i;	/* temp state */
1716	xfs_ifork_t		*ifp;	/* inode fork pointer */
1717	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1718	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1719					/* left is 0, right is 1, prev is 2 */
1720	int			rval=0;	/* return value (logging flags) */
1721	int			state = 0;/* state bits, accessed thru macros */
1722	xfs_filblks_t		da_new; /* new count del alloc blocks used */
1723	xfs_filblks_t		da_old; /* old count del alloc blocks used */
1724	xfs_filblks_t		temp=0;	/* value for da_new calculations */
1725	xfs_filblks_t		temp2=0;/* value for da_new calculations */
1726	int			tmp_rval;	/* partial logging flags */
1727	struct xfs_mount	*mp;
1728
1729	mp  = bma->tp ? bma->tp->t_mountp : NULL;
1730	ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
1731
1732	ASSERT(bma->idx >= 0);
1733	ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
1734	ASSERT(!isnullstartblock(new->br_startblock));
1735	ASSERT(!bma->cur ||
1736	       (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1737
1738	XFS_STATS_INC(xs_add_exlist);
1739
1740#define	LEFT		r[0]
1741#define	RIGHT		r[1]
1742#define	PREV		r[2]
1743
1744	/*
1745	 * Set up a bunch of variables to make the tests simpler.
1746	 */
1747	ep = xfs_iext_get_ext(ifp, bma->idx);
1748	xfs_bmbt_get_all(ep, &PREV);
1749	new_endoff = new->br_startoff + new->br_blockcount;
1750	ASSERT(PREV.br_startoff <= new->br_startoff);
1751	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1752
1753	da_old = startblockval(PREV.br_startblock);
1754	da_new = 0;
1755
1756	/*
1757	 * Set flags determining what part of the previous delayed allocation
1758	 * extent is being replaced by a real allocation.
1759	 */
1760	if (PREV.br_startoff == new->br_startoff)
1761		state |= BMAP_LEFT_FILLING;
1762	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1763		state |= BMAP_RIGHT_FILLING;
1764
1765	/*
1766	 * Check and set flags if this segment has a left neighbor.
1767	 * Don't set contiguous if the combined extent would be too large.
1768	 */
1769	if (bma->idx > 0) {
1770		state |= BMAP_LEFT_VALID;
1771		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
1772
1773		if (isnullstartblock(LEFT.br_startblock))
1774			state |= BMAP_LEFT_DELAY;
1775	}
1776
1777	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1778	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1779	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1780	    LEFT.br_state == new->br_state &&
1781	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1782		state |= BMAP_LEFT_CONTIG;
1783
1784	/*
1785	 * Check and set flags if this segment has a right neighbor.
1786	 * Don't set contiguous if the combined extent would be too large.
1787	 * Also check for all-three-contiguous being too large.
1788	 */
1789	if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1790		state |= BMAP_RIGHT_VALID;
1791		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1792
1793		if (isnullstartblock(RIGHT.br_startblock))
1794			state |= BMAP_RIGHT_DELAY;
1795	}
1796
1797	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1798	    new_endoff == RIGHT.br_startoff &&
1799	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1800	    new->br_state == RIGHT.br_state &&
1801	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1802	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1803		       BMAP_RIGHT_FILLING)) !=
1804		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1805		       BMAP_RIGHT_FILLING) ||
1806	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1807			<= MAXEXTLEN))
1808		state |= BMAP_RIGHT_CONTIG;
1809
1810	error = 0;
1811	/*
1812	 * Switch out based on the FILLING and CONTIG state bits.
1813	 */
1814	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1815			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1816	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1817	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1818		/*
1819		 * Filling in all of a previously delayed allocation extent.
1820		 * The left and right neighbors are both contiguous with new.
1821		 */
1822		bma->idx--;
1823		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1824		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1825			LEFT.br_blockcount + PREV.br_blockcount +
1826			RIGHT.br_blockcount);
1827		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1828
1829		xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1830		bma->ip->i_d.di_nextents--;
1831		if (bma->cur == NULL)
1832			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1833		else {
1834			rval = XFS_ILOG_CORE;
1835			error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1836					RIGHT.br_startblock,
1837					RIGHT.br_blockcount, &i);
1838			if (error)
1839				goto done;
1840			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1841			error = xfs_btree_delete(bma->cur, &i);
1842			if (error)
1843				goto done;
1844			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1845			error = xfs_btree_decrement(bma->cur, 0, &i);
1846			if (error)
1847				goto done;
1848			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1849			error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1850					LEFT.br_startblock,
1851					LEFT.br_blockcount +
1852					PREV.br_blockcount +
1853					RIGHT.br_blockcount, LEFT.br_state);
1854			if (error)
1855				goto done;
1856		}
1857		break;
1858
1859	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1860		/*
1861		 * Filling in all of a previously delayed allocation extent.
1862		 * The left neighbor is contiguous, the right is not.
1863		 */
1864		bma->idx--;
1865
1866		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1867		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1868			LEFT.br_blockcount + PREV.br_blockcount);
1869		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1870
1871		xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1872		if (bma->cur == NULL)
1873			rval = XFS_ILOG_DEXT;
1874		else {
1875			rval = 0;
1876			error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1877					LEFT.br_startblock, LEFT.br_blockcount,
1878					&i);
1879			if (error)
1880				goto done;
1881			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1882			error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1883					LEFT.br_startblock,
1884					LEFT.br_blockcount +
1885					PREV.br_blockcount, LEFT.br_state);
1886			if (error)
1887				goto done;
1888		}
1889		break;
1890
1891	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1892		/*
1893		 * Filling in all of a previously delayed allocation extent.
1894		 * The right neighbor is contiguous, the left is not.
1895		 */
1896		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1897		xfs_bmbt_set_startblock(ep, new->br_startblock);
1898		xfs_bmbt_set_blockcount(ep,
1899			PREV.br_blockcount + RIGHT.br_blockcount);
1900		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1901
1902		xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1903		if (bma->cur == NULL)
1904			rval = XFS_ILOG_DEXT;
1905		else {
1906			rval = 0;
1907			error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1908					RIGHT.br_startblock,
1909					RIGHT.br_blockcount, &i);
1910			if (error)
1911				goto done;
1912			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1913			error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1914					new->br_startblock,
1915					PREV.br_blockcount +
1916					RIGHT.br_blockcount, PREV.br_state);
1917			if (error)
1918				goto done;
1919		}
1920		break;
1921
1922	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1923		/*
1924		 * Filling in all of a previously delayed allocation extent.
1925		 * Neither the left nor right neighbors are contiguous with
1926		 * the new one.
1927		 */
1928		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1929		xfs_bmbt_set_startblock(ep, new->br_startblock);
1930		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1931
1932		bma->ip->i_d.di_nextents++;
1933		if (bma->cur == NULL)
1934			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1935		else {
1936			rval = XFS_ILOG_CORE;
1937			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
1938					new->br_startblock, new->br_blockcount,
1939					&i);
1940			if (error)
1941				goto done;
1942			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1943			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1944			error = xfs_btree_insert(bma->cur, &i);
1945			if (error)
1946				goto done;
1947			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1948		}
1949		break;
1950
1951	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1952		/*
1953		 * Filling in the first part of a previous delayed allocation.
1954		 * The left neighbor is contiguous.
1955		 */
1956		trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1957		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
1958			LEFT.br_blockcount + new->br_blockcount);
1959		xfs_bmbt_set_startoff(ep,
1960			PREV.br_startoff + new->br_blockcount);
1961		trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1962
1963		temp = PREV.br_blockcount - new->br_blockcount;
1964		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1965		xfs_bmbt_set_blockcount(ep, temp);
1966		if (bma->cur == NULL)
1967			rval = XFS_ILOG_DEXT;
1968		else {
1969			rval = 0;
1970			error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1971					LEFT.br_startblock, LEFT.br_blockcount,
1972					&i);
1973			if (error)
1974				goto done;
1975			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1976			error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1977					LEFT.br_startblock,
1978					LEFT.br_blockcount +
1979					new->br_blockcount,
1980					LEFT.br_state);
1981			if (error)
1982				goto done;
1983		}
1984		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1985			startblockval(PREV.br_startblock));
1986		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1987		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1988
1989		bma->idx--;
1990		break;
1991
1992	case BMAP_LEFT_FILLING:
1993		/*
1994		 * Filling in the first part of a previous delayed allocation.
1995		 * The left neighbor is not contiguous.
1996		 */
1997		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1998		xfs_bmbt_set_startoff(ep, new_endoff);
1999		temp = PREV.br_blockcount - new->br_blockcount;
2000		xfs_bmbt_set_blockcount(ep, temp);
2001		xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
2002		bma->ip->i_d.di_nextents++;
2003		if (bma->cur == NULL)
2004			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2005		else {
2006			rval = XFS_ILOG_CORE;
2007			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2008					new->br_startblock, new->br_blockcount,
2009					&i);
2010			if (error)
2011				goto done;
2012			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2013			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2014			error = xfs_btree_insert(bma->cur, &i);
2015			if (error)
2016				goto done;
2017			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2018		}
2019
2020		if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2021			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2022					bma->firstblock, bma->flist,
2023					&bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
2024			rval |= tmp_rval;
2025			if (error)
2026				goto done;
2027		}
2028		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2029			startblockval(PREV.br_startblock) -
2030			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
2031		ep = xfs_iext_get_ext(ifp, bma->idx + 1);
2032		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2033		trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2034		break;
2035
2036	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2037		/*
2038		 * Filling in the last part of a previous delayed allocation.
2039		 * The right neighbor is contiguous with the new allocation.
2040		 */
2041		temp = PREV.br_blockcount - new->br_blockcount;
2042		trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2043		xfs_bmbt_set_blockcount(ep, temp);
2044		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
2045			new->br_startoff, new->br_startblock,
2046			new->br_blockcount + RIGHT.br_blockcount,
2047			RIGHT.br_state);
2048		trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2049		if (bma->cur == NULL)
2050			rval = XFS_ILOG_DEXT;
2051		else {
2052			rval = 0;
2053			error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
2054					RIGHT.br_startblock,
2055					RIGHT.br_blockcount, &i);
2056			if (error)
2057				goto done;
2058			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2059			error = xfs_bmbt_update(bma->cur, new->br_startoff,
2060					new->br_startblock,
2061					new->br_blockcount +
2062					RIGHT.br_blockcount,
2063					RIGHT.br_state);
2064			if (error)
2065				goto done;
2066		}
2067
2068		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2069			startblockval(PREV.br_startblock));
2070		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2071		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2072		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2073
2074		bma->idx++;
2075		break;
2076
2077	case BMAP_RIGHT_FILLING:
2078		/*
2079		 * Filling in the last part of a previous delayed allocation.
2080		 * The right neighbor is not contiguous.
2081		 */
2082		temp = PREV.br_blockcount - new->br_blockcount;
2083		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2084		xfs_bmbt_set_blockcount(ep, temp);
2085		xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
2086		bma->ip->i_d.di_nextents++;
2087		if (bma->cur == NULL)
2088			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2089		else {
2090			rval = XFS_ILOG_CORE;
2091			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2092					new->br_startblock, new->br_blockcount,
2093					&i);
2094			if (error)
2095				goto done;
2096			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2097			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2098			error = xfs_btree_insert(bma->cur, &i);
2099			if (error)
2100				goto done;
2101			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2102		}
2103
2104		if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2105			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2106				bma->firstblock, bma->flist, &bma->cur, 1,
2107				&tmp_rval, XFS_DATA_FORK);
2108			rval |= tmp_rval;
2109			if (error)
2110				goto done;
2111		}
2112		da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2113			startblockval(PREV.br_startblock) -
2114			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
2115		ep = xfs_iext_get_ext(ifp, bma->idx);
2116		xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2117		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2118
2119		bma->idx++;
2120		break;
2121
2122	case 0:
2123		/*
2124		 * Filling in the middle part of a previous delayed allocation.
2125		 * Contiguity is impossible here.
2126		 * This case is avoided almost all the time.
2127		 *
2128		 * We start with a delayed allocation:
2129		 *
2130		 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
2131		 *  PREV @ idx
2132		 *
2133	         * and we are allocating:
2134		 *                     +rrrrrrrrrrrrrrrrr+
2135		 *			      new
2136		 *
2137		 * and we set it up for insertion as:
2138		 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
2139		 *                            new
2140		 *  PREV @ idx          LEFT              RIGHT
2141		 *                      inserted at idx + 1
2142		 */
2143		temp = new->br_startoff - PREV.br_startoff;
2144		temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
2145		trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
2146		xfs_bmbt_set_blockcount(ep, temp);	/* truncate PREV */
2147		LEFT = *new;
2148		RIGHT.br_state = PREV.br_state;
2149		RIGHT.br_startblock = nullstartblock(
2150				(int)xfs_bmap_worst_indlen(bma->ip, temp2));
2151		RIGHT.br_startoff = new_endoff;
2152		RIGHT.br_blockcount = temp2;
2153		/* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
2154		xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
2155		bma->ip->i_d.di_nextents++;
2156		if (bma->cur == NULL)
2157			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2158		else {
2159			rval = XFS_ILOG_CORE;
2160			error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2161					new->br_startblock, new->br_blockcount,
2162					&i);
2163			if (error)
2164				goto done;
2165			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2166			bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2167			error = xfs_btree_insert(bma->cur, &i);
2168			if (error)
2169				goto done;
2170			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2171		}
2172
2173		if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2174			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2175					bma->firstblock, bma->flist, &bma->cur,
2176					1, &tmp_rval, XFS_DATA_FORK);
2177			rval |= tmp_rval;
2178			if (error)
2179				goto done;
2180		}
2181		temp = xfs_bmap_worst_indlen(bma->ip, temp);
2182		temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2183		diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2184			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
2185		if (diff > 0) {
2186			error = xfs_mod_fdblocks(bma->ip->i_mount,
2187						 -((int64_t)diff), false);
2188			ASSERT(!error);
2189			if (error)
2190				goto done;
2191		}
2192
2193		ep = xfs_iext_get_ext(ifp, bma->idx);
2194		xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2195		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2196		trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2197		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
2198			nullstartblock((int)temp2));
2199		trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2200
2201		bma->idx++;
2202		da_new = temp + temp2;
2203		break;
2204
2205	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2206	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2207	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2208	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2209	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2210	case BMAP_LEFT_CONTIG:
2211	case BMAP_RIGHT_CONTIG:
2212		/*
2213		 * These cases are all impossible.
2214		 */
2215		ASSERT(0);
2216	}
2217
2218	/* convert to a btree if necessary */
2219	if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
2220		int	tmp_logflags;	/* partial log flag return val */
2221
2222		ASSERT(bma->cur == NULL);
2223		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2224				bma->firstblock, bma->flist, &bma->cur,
2225				da_old > 0, &tmp_logflags, XFS_DATA_FORK);
2226		bma->logflags |= tmp_logflags;
2227		if (error)
2228			goto done;
2229	}
2230
2231	/* adjust for changes in reserved delayed indirect blocks */
2232	if (da_old || da_new) {
2233		temp = da_new;
2234		if (bma->cur)
2235			temp += bma->cur->bc_private.b.allocated;
2236		ASSERT(temp <= da_old);
2237		if (temp < da_old)
2238			xfs_mod_fdblocks(bma->ip->i_mount,
2239					(int64_t)(da_old - temp), false);
2240	}
2241
2242	/* clear out the allocated field, done with it now in any case. */
2243	if (bma->cur)
2244		bma->cur->bc_private.b.allocated = 0;
2245
2246	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
2247done:
2248	bma->logflags |= rval;
2249	return error;
2250#undef	LEFT
2251#undef	RIGHT
2252#undef	PREV
2253}
2254
2255/*
2256 * Convert an unwritten allocation to a real allocation or vice versa.
2257 */
2258STATIC int				/* error */
2259xfs_bmap_add_extent_unwritten_real(
2260	struct xfs_trans	*tp,
2261	xfs_inode_t		*ip,	/* incore inode pointer */
2262	xfs_extnum_t		*idx,	/* extent number to update/insert */
2263	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
2264	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
2265	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
2266	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
2267	int			*logflagsp) /* inode logging flags */
2268{
2269	xfs_btree_cur_t		*cur;	/* btree cursor */
2270	xfs_bmbt_rec_host_t	*ep;	/* extent entry for idx */
2271	int			error;	/* error return value */
2272	int			i;	/* temp state */
2273	xfs_ifork_t		*ifp;	/* inode fork pointer */
2274	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
2275	xfs_exntst_t		newext;	/* new extent state */
2276	xfs_exntst_t		oldext;	/* old extent state */
2277	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
2278					/* left is 0, right is 1, prev is 2 */
2279	int			rval=0;	/* return value (logging flags) */
2280	int			state = 0;/* state bits, accessed thru macros */
2281	struct xfs_mount	*mp = tp->t_mountp;
2282
2283	*logflagsp = 0;
2284
2285	cur = *curp;
2286	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2287
2288	ASSERT(*idx >= 0);
2289	ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2290	ASSERT(!isnullstartblock(new->br_startblock));
2291
2292	XFS_STATS_INC(xs_add_exlist);
2293
2294#define	LEFT		r[0]
2295#define	RIGHT		r[1]
2296#define	PREV		r[2]
2297
2298	/*
2299	 * Set up a bunch of variables to make the tests simpler.
2300	 */
2301	error = 0;
2302	ep = xfs_iext_get_ext(ifp, *idx);
2303	xfs_bmbt_get_all(ep, &PREV);
2304	newext = new->br_state;
2305	oldext = (newext == XFS_EXT_UNWRITTEN) ?
2306		XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
2307	ASSERT(PREV.br_state == oldext);
2308	new_endoff = new->br_startoff + new->br_blockcount;
2309	ASSERT(PREV.br_startoff <= new->br_startoff);
2310	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2311
2312	/*
2313	 * Set flags determining what part of the previous oldext allocation
2314	 * extent is being replaced by a newext allocation.
2315	 */
2316	if (PREV.br_startoff == new->br_startoff)
2317		state |= BMAP_LEFT_FILLING;
2318	if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2319		state |= BMAP_RIGHT_FILLING;
2320
2321	/*
2322	 * Check and set flags if this segment has a left neighbor.
2323	 * Don't set contiguous if the combined extent would be too large.
2324	 */
2325	if (*idx > 0) {
2326		state |= BMAP_LEFT_VALID;
2327		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
2328
2329		if (isnullstartblock(LEFT.br_startblock))
2330			state |= BMAP_LEFT_DELAY;
2331	}
2332
2333	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2334	    LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2335	    LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2336	    LEFT.br_state == newext &&
2337	    LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2338		state |= BMAP_LEFT_CONTIG;
2339
2340	/*
2341	 * Check and set flags if this segment has a right neighbor.
2342	 * Don't set contiguous if the combined extent would be too large.
2343	 * Also check for all-three-contiguous being too large.
2344	 */
2345	if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
2346		state |= BMAP_RIGHT_VALID;
2347		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2348		if (isnullstartblock(RIGHT.br_startblock))
2349			state |= BMAP_RIGHT_DELAY;
2350	}
2351
2352	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2353	    new_endoff == RIGHT.br_startoff &&
2354	    new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2355	    newext == RIGHT.br_state &&
2356	    new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2357	    ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2358		       BMAP_RIGHT_FILLING)) !=
2359		      (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2360		       BMAP_RIGHT_FILLING) ||
2361	     LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2362			<= MAXEXTLEN))
2363		state |= BMAP_RIGHT_CONTIG;
2364
2365	/*
2366	 * Switch out based on the FILLING and CONTIG state bits.
2367	 */
2368	switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2369			 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2370	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2371	     BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2372		/*
2373		 * Setting all of a previous oldext extent to newext.
2374		 * The left and right neighbors are both contiguous with new.
2375		 */
2376		--*idx;
2377
2378		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2379		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2380			LEFT.br_blockcount + PREV.br_blockcount +
2381			RIGHT.br_blockcount);
2382		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2383
2384		xfs_iext_remove(ip, *idx + 1, 2, state);
2385		ip->i_d.di_nextents -= 2;
2386		if (cur == NULL)
2387			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2388		else {
2389			rval = XFS_ILOG_CORE;
2390			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2391					RIGHT.br_startblock,
2392					RIGHT.br_blockcount, &i)))
2393				goto done;
2394			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2395			if ((error = xfs_btree_delete(cur, &i)))
2396				goto done;
2397			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2398			if ((error = xfs_btree_decrement(cur, 0, &i)))
2399				goto done;
2400			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2401			if ((error = xfs_btree_delete(cur, &i)))
2402				goto done;
2403			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2404			if ((error = xfs_btree_decrement(cur, 0, &i)))
2405				goto done;
2406			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2407			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2408				LEFT.br_startblock,
2409				LEFT.br_blockcount + PREV.br_blockcount +
2410				RIGHT.br_blockcount, LEFT.br_state)))
2411				goto done;
2412		}
2413		break;
2414
2415	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2416		/*
2417		 * Setting all of a previous oldext extent to newext.
2418		 * The left neighbor is contiguous, the right is not.
2419		 */
2420		--*idx;
2421
2422		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2423		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2424			LEFT.br_blockcount + PREV.br_blockcount);
2425		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2426
2427		xfs_iext_remove(ip, *idx + 1, 1, state);
2428		ip->i_d.di_nextents--;
2429		if (cur == NULL)
2430			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2431		else {
2432			rval = XFS_ILOG_CORE;
2433			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2434					PREV.br_startblock, PREV.br_blockcount,
2435					&i)))
2436				goto done;
2437			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2438			if ((error = xfs_btree_delete(cur, &i)))
2439				goto done;
2440			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2441			if ((error = xfs_btree_decrement(cur, 0, &i)))
2442				goto done;
2443			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2444			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2445				LEFT.br_startblock,
2446				LEFT.br_blockcount + PREV.br_blockcount,
2447				LEFT.br_state)))
2448				goto done;
2449		}
2450		break;
2451
2452	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2453		/*
2454		 * Setting all of a previous oldext extent to newext.
2455		 * The right neighbor is contiguous, the left is not.
2456		 */
2457		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2458		xfs_bmbt_set_blockcount(ep,
2459			PREV.br_blockcount + RIGHT.br_blockcount);
2460		xfs_bmbt_set_state(ep, newext);
2461		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2462		xfs_iext_remove(ip, *idx + 1, 1, state);
2463		ip->i_d.di_nextents--;
2464		if (cur == NULL)
2465			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2466		else {
2467			rval = XFS_ILOG_CORE;
2468			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2469					RIGHT.br_startblock,
2470					RIGHT.br_blockcount, &i)))
2471				goto done;
2472			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2473			if ((error = xfs_btree_delete(cur, &i)))
2474				goto done;
2475			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2476			if ((error = xfs_btree_decrement(cur, 0, &i)))
2477				goto done;
2478			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2479			if ((error = xfs_bmbt_update(cur, new->br_startoff,
2480				new->br_startblock,
2481				new->br_blockcount + RIGHT.br_blockcount,
2482				newext)))
2483				goto done;
2484		}
2485		break;
2486
2487	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2488		/*
2489		 * Setting all of a previous oldext extent to newext.
2490		 * Neither the left nor right neighbors are contiguous with
2491		 * the new one.
2492		 */
2493		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2494		xfs_bmbt_set_state(ep, newext);
2495		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2496
2497		if (cur == NULL)
2498			rval = XFS_ILOG_DEXT;
2499		else {
2500			rval = 0;
2501			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2502					new->br_startblock, new->br_blockcount,
2503					&i)))
2504				goto done;
2505			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2506			if ((error = xfs_bmbt_update(cur, new->br_startoff,
2507				new->br_startblock, new->br_blockcount,
2508				newext)))
2509				goto done;
2510		}
2511		break;
2512
2513	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2514		/*
2515		 * Setting the first part of a previous oldext extent to newext.
2516		 * The left neighbor is contiguous.
2517		 */
2518		trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
2519		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
2520			LEFT.br_blockcount + new->br_blockcount);
2521		xfs_bmbt_set_startoff(ep,
2522			PREV.br_startoff + new->br_blockcount);
2523		trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
2524
2525		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2526		xfs_bmbt_set_startblock(ep,
2527			new->br_startblock + new->br_blockcount);
2528		xfs_bmbt_set_blockcount(ep,
2529			PREV.br_blockcount - new->br_blockcount);
2530		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2531
2532		--*idx;
2533
2534		if (cur == NULL)
2535			rval = XFS_ILOG_DEXT;
2536		else {
2537			rval = 0;
2538			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2539					PREV.br_startblock, PREV.br_blockcount,
2540					&i)))
2541				goto done;
2542			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2543			if ((error = xfs_bmbt_update(cur,
2544				PREV.br_startoff + new->br_blockcount,
2545				PREV.br_startblock + new->br_blockcount,
2546				PREV.br_blockcount - new->br_blockcount,
2547				oldext)))
2548				goto done;
2549			if ((error = xfs_btree_decrement(cur, 0, &i)))
2550				goto done;
2551			error = xfs_bmbt_update(cur, LEFT.br_startoff,
2552				LEFT.br_startblock,
2553				LEFT.br_blockcount + new->br_blockcount,
2554				LEFT.br_state);
2555			if (error)
2556				goto done;
2557		}
2558		break;
2559
2560	case BMAP_LEFT_FILLING:
2561		/*
2562		 * Setting the first part of a previous oldext extent to newext.
2563		 * The left neighbor is not contiguous.
2564		 */
2565		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2566		ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
2567		xfs_bmbt_set_startoff(ep, new_endoff);
2568		xfs_bmbt_set_blockcount(ep,
2569			PREV.br_blockcount - new->br_blockcount);
2570		xfs_bmbt_set_startblock(ep,
2571			new->br_startblock + new->br_blockcount);
2572		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2573
2574		xfs_iext_insert(ip, *idx, 1, new, state);
2575		ip->i_d.di_nextents++;
2576		if (cur == NULL)
2577			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2578		else {
2579			rval = XFS_ILOG_CORE;
2580			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2581					PREV.br_startblock, PREV.br_blockcount,
2582					&i)))
2583				goto done;
2584			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2585			if ((error = xfs_bmbt_update(cur,
2586				PREV.br_startoff + new->br_blockcount,
2587				PREV.br_startblock + new->br_blockcount,
2588				PREV.br_blockcount - new->br_blockcount,
2589				oldext)))
2590				goto done;
2591			cur->bc_rec.b = *new;
2592			if ((error = xfs_btree_insert(cur, &i)))
2593				goto done;
2594			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2595		}
2596		break;
2597
2598	case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2599		/*
2600		 * Setting the last part of a previous oldext extent to newext.
2601		 * The right neighbor is contiguous with the new allocation.
2602		 */
2603		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2604		xfs_bmbt_set_blockcount(ep,
2605			PREV.br_blockcount - new->br_blockcount);
2606		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2607
2608		++*idx;
2609
2610		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2611		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2612			new->br_startoff, new->br_startblock,
2613			new->br_blockcount + RIGHT.br_blockcount, newext);
2614		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2615
2616		if (cur == NULL)
2617			rval = XFS_ILOG_DEXT;
2618		else {
2619			rval = 0;
2620			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2621					PREV.br_startblock,
2622					PREV.br_blockcount, &i)))
2623				goto done;
2624			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2625			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2626				PREV.br_startblock,
2627				PREV.br_blockcount - new->br_blockcount,
2628				oldext)))
2629				goto done;
2630			if ((error = xfs_btree_increment(cur, 0, &i)))
2631				goto done;
2632			if ((error = xfs_bmbt_update(cur, new->br_startoff,
2633				new->br_startblock,
2634				new->br_blockcount + RIGHT.br_blockcount,
2635				newext)))
2636				goto done;
2637		}
2638		break;
2639
2640	case BMAP_RIGHT_FILLING:
2641		/*
2642		 * Setting the last part of a previous oldext extent to newext.
2643		 * The right neighbor is not contiguous.
2644		 */
2645		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2646		xfs_bmbt_set_blockcount(ep,
2647			PREV.br_blockcount - new->br_blockcount);
2648		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2649
2650		++*idx;
2651		xfs_iext_insert(ip, *idx, 1, new, state);
2652
2653		ip->i_d.di_nextents++;
2654		if (cur == NULL)
2655			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2656		else {
2657			rval = XFS_ILOG_CORE;
2658			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2659					PREV.br_startblock, PREV.br_blockcount,
2660					&i)))
2661				goto done;
2662			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2663			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2664				PREV.br_startblock,
2665				PREV.br_blockcount - new->br_blockcount,
2666				oldext)))
2667				goto done;
2668			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2669					new->br_startblock, new->br_blockcount,
2670					&i)))
2671				goto done;
2672			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2673			cur->bc_rec.b.br_state = XFS_EXT_NORM;
2674			if ((error = xfs_btree_insert(cur, &i)))
2675				goto done;
2676			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2677		}
2678		break;
2679
2680	case 0:
2681		/*
2682		 * Setting the middle part of a previous oldext extent to
2683		 * newext.  Contiguity is impossible here.
2684		 * One extent becomes three extents.
2685		 */
2686		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2687		xfs_bmbt_set_blockcount(ep,
2688			new->br_startoff - PREV.br_startoff);
2689		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2690
2691		r[0] = *new;
2692		r[1].br_startoff = new_endoff;
2693		r[1].br_blockcount =
2694			PREV.br_startoff + PREV.br_blockcount - new_endoff;
2695		r[1].br_startblock = new->br_startblock + new->br_blockcount;
2696		r[1].br_state = oldext;
2697
2698		++*idx;
2699		xfs_iext_insert(ip, *idx, 2, &r[0], state);
2700
2701		ip->i_d.di_nextents += 2;
2702		if (cur == NULL)
2703			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2704		else {
2705			rval = XFS_ILOG_CORE;
2706			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2707					PREV.br_startblock, PREV.br_blockcount,
2708					&i)))
2709				goto done;
2710			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2711			/* new right extent - oldext */
2712			if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2713				r[1].br_startblock, r[1].br_blockcount,
2714				r[1].br_state)))
2715				goto done;
2716			/* new left extent - oldext */
2717			cur->bc_rec.b = PREV;
2718			cur->bc_rec.b.br_blockcount =
2719				new->br_startoff - PREV.br_startoff;
2720			if ((error = xfs_btree_insert(cur, &i)))
2721				goto done;
2722			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2723			/*
2724			 * Reset the cursor to the position of the new extent
2725			 * we are about to insert as we can't trust it after
2726			 * the previous insert.
2727			 */
2728			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2729					new->br_startblock, new->br_blockcount,
2730					&i)))
2731				goto done;
2732			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2733			/* new middle extent - newext */
2734			cur->bc_rec.b.br_state = new->br_state;
2735			if ((error = xfs_btree_insert(cur, &i)))
2736				goto done;
2737			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2738		}
2739		break;
2740
2741	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2742	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2743	case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2744	case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2745	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2746	case BMAP_LEFT_CONTIG:
2747	case BMAP_RIGHT_CONTIG:
2748		/*
2749		 * These cases are all impossible.
2750		 */
2751		ASSERT(0);
2752	}
2753
2754	/* convert to a btree if necessary */
2755	if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2756		int	tmp_logflags;	/* partial log flag return val */
2757
2758		ASSERT(cur == NULL);
2759		error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
2760				0, &tmp_logflags, XFS_DATA_FORK);
2761		*logflagsp |= tmp_logflags;
2762		if (error)
2763			goto done;
2764	}
2765
2766	/* clear out the allocated field, done with it now in any case. */
2767	if (cur) {
2768		cur->bc_private.b.allocated = 0;
2769		*curp = cur;
2770	}
2771
2772	xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
2773done:
2774	*logflagsp |= rval;
2775	return error;
2776#undef	LEFT
2777#undef	RIGHT
2778#undef	PREV
2779}
2780
2781/*
2782 * Convert a hole to a delayed allocation.
2783 */
2784STATIC void
2785xfs_bmap_add_extent_hole_delay(
2786	xfs_inode_t		*ip,	/* incore inode pointer */
2787	xfs_extnum_t		*idx,	/* extent number to update/insert */
2788	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
2789{
2790	xfs_ifork_t		*ifp;	/* inode fork pointer */
2791	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2792	xfs_filblks_t		newlen=0;	/* new indirect size */
2793	xfs_filblks_t		oldlen=0;	/* old indirect size */
2794	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2795	int			state;  /* state bits, accessed thru macros */
2796	xfs_filblks_t		temp=0;	/* temp for indirect calculations */
2797
2798	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2799	state = 0;
2800	ASSERT(isnullstartblock(new->br_startblock));
2801
2802	/*
2803	 * Check and set flags if this segment has a left neighbor
2804	 */
2805	if (*idx > 0) {
2806		state |= BMAP_LEFT_VALID;
2807		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2808
2809		if (isnullstartblock(left.br_startblock))
2810			state |= BMAP_LEFT_DELAY;
2811	}
2812
2813	/*
2814	 * Check and set flags if the current (right) segment exists.
2815	 * If it doesn't exist, we're converting the hole at end-of-file.
2816	 */
2817	if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2818		state |= BMAP_RIGHT_VALID;
2819		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2820
2821		if (isnullstartblock(right.br_startblock))
2822			state |= BMAP_RIGHT_DELAY;
2823	}
2824
2825	/*
2826	 * Set contiguity flags on the left and right neighbors.
2827	 * Don't let extents get too large, even if the pieces are contiguous.
2828	 */
2829	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2830	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2831	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2832		state |= BMAP_LEFT_CONTIG;
2833
2834	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2835	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2836	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2837	    (!(state & BMAP_LEFT_CONTIG) ||
2838	     (left.br_blockcount + new->br_blockcount +
2839	      right.br_blockcount <= MAXEXTLEN)))
2840		state |= BMAP_RIGHT_CONTIG;
2841
2842	/*
2843	 * Switch out based on the contiguity flags.
2844	 */
2845	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2846	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2847		/*
2848		 * New allocation is contiguous with delayed allocations
2849		 * on the left and on the right.
2850		 * Merge all three into a single extent record.
2851		 */
2852		--*idx;
2853		temp = left.br_blockcount + new->br_blockcount +
2854			right.br_blockcount;
2855
2856		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2857		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2858		oldlen = startblockval(left.br_startblock) +
2859			startblockval(new->br_startblock) +
2860			startblockval(right.br_startblock);
2861		newlen = xfs_bmap_worst_indlen(ip, temp);
2862		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2863			nullstartblock((int)newlen));
2864		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2865
2866		xfs_iext_remove(ip, *idx + 1, 1, state);
2867		break;
2868
2869	case BMAP_LEFT_CONTIG:
2870		/*
2871		 * New allocation is contiguous with a delayed allocation
2872		 * on the left.
2873		 * Merge the new allocation with the left neighbor.
2874		 */
2875		--*idx;
2876		temp = left.br_blockcount + new->br_blockcount;
2877
2878		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2879		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2880		oldlen = startblockval(left.br_startblock) +
2881			startblockval(new->br_startblock);
2882		newlen = xfs_bmap_worst_indlen(ip, temp);
2883		xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2884			nullstartblock((int)newlen));
2885		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2886		break;
2887
2888	case BMAP_RIGHT_CONTIG:
2889		/*
2890		 * New allocation is contiguous with a delayed allocation
2891		 * on the right.
2892		 * Merge the new allocation with the right neighbor.
2893		 */
2894		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2895		temp = new->br_blockcount + right.br_blockcount;
2896		oldlen = startblockval(new->br_startblock) +
2897			startblockval(right.br_startblock);
2898		newlen = xfs_bmap_worst_indlen(ip, temp);
2899		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2900			new->br_startoff,
2901			nullstartblock((int)newlen), temp, right.br_state);
2902		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2903		break;
2904
2905	case 0:
2906		/*
2907		 * New allocation is not contiguous with another
2908		 * delayed allocation.
2909		 * Insert a new entry.
2910		 */
2911		oldlen = newlen = 0;
2912		xfs_iext_insert(ip, *idx, 1, new, state);
2913		break;
2914	}
2915	if (oldlen != newlen) {
2916		ASSERT(oldlen > newlen);
2917		xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2918				 false);
2919		/*
2920		 * Nothing to do for disk quota accounting here.
2921		 */
2922	}
2923}
2924
2925/*
2926 * Convert a hole to a real allocation.
2927 */
2928STATIC int				/* error */
2929xfs_bmap_add_extent_hole_real(
2930	struct xfs_bmalloca	*bma,
2931	int			whichfork)
2932{
2933	struct xfs_bmbt_irec	*new = &bma->got;
2934	int			error;	/* error return value */
2935	int			i;	/* temp state */
2936	xfs_ifork_t		*ifp;	/* inode fork pointer */
2937	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
2938	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
2939	int			rval=0;	/* return value (logging flags) */
2940	int			state;	/* state bits, accessed thru macros */
2941	struct xfs_mount	*mp;
2942
2943	mp = bma->tp ? bma->tp->t_mountp : NULL;
2944	ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2945
2946	ASSERT(bma->idx >= 0);
2947	ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2948	ASSERT(!isnullstartblock(new->br_startblock));
2949	ASSERT(!bma->cur ||
2950	       !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2951
2952	XFS_STATS_INC(xs_add_exlist);
2953
2954	state = 0;
2955	if (whichfork == XFS_ATTR_FORK)
2956		state |= BMAP_ATTRFORK;
2957
2958	/*
2959	 * Check and set flags if this segment has a left neighbor.
2960	 */
2961	if (bma->idx > 0) {
2962		state |= BMAP_LEFT_VALID;
2963		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
2964		if (isnullstartblock(left.br_startblock))
2965			state |= BMAP_LEFT_DELAY;
2966	}
2967
2968	/*
2969	 * Check and set flags if this segment has a current value.
2970	 * Not true if we're inserting into the "hole" at eof.
2971	 */
2972	if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2973		state |= BMAP_RIGHT_VALID;
2974		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
2975		if (isnullstartblock(right.br_startblock))
2976			state |= BMAP_RIGHT_DELAY;
2977	}
2978
2979	/*
2980	 * We're inserting a real allocation between "left" and "right".
2981	 * Set the contiguity flags.  Don't let extents get too large.
2982	 */
2983	if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2984	    left.br_startoff + left.br_blockcount == new->br_startoff &&
2985	    left.br_startblock + left.br_blockcount == new->br_startblock &&
2986	    left.br_state == new->br_state &&
2987	    left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2988		state |= BMAP_LEFT_CONTIG;
2989
2990	if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2991	    new->br_startoff + new->br_blockcount == right.br_startoff &&
2992	    new->br_startblock + new->br_blockcount == right.br_startblock &&
2993	    new->br_state == right.br_state &&
2994	    new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2995	    (!(state & BMAP_LEFT_CONTIG) ||
2996	     left.br_blockcount + new->br_blockcount +
2997	     right.br_blockcount <= MAXEXTLEN))
2998		state |= BMAP_RIGHT_CONTIG;
2999
3000	error = 0;
3001	/*
3002	 * Select which case we're in here, and implement it.
3003	 */
3004	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
3005	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
3006		/*
3007		 * New allocation is contiguous with real allocations on the
3008		 * left and on the right.
3009		 * Merge all three into a single extent record.
3010		 */
3011		--bma->idx;
3012		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3013		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3014			left.br_blockcount + new->br_blockcount +
3015			right.br_blockcount);
3016		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3017
3018		xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
3019
3020		XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3021			XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
3022		if (bma->cur == NULL) {
3023			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3024		} else {
3025			rval = XFS_ILOG_CORE;
3026			error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
3027					right.br_startblock, right.br_blockcount,
3028					&i);
3029			if (error)
3030				goto done;
3031			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3032			error = xfs_btree_delete(bma->cur, &i);
3033			if (error)
3034				goto done;
3035			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3036			error = xfs_btree_decrement(bma->cur, 0, &i);
3037			if (error)
3038				goto done;
3039			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3040			error = xfs_bmbt_update(bma->cur, left.br_startoff,
3041					left.br_startblock,
3042					left.br_blockcount +
3043						new->br_blockcount +
3044						right.br_blockcount,
3045					left.br_state);
3046			if (error)
3047				goto done;
3048		}
3049		break;
3050
3051	case BMAP_LEFT_CONTIG:
3052		/*
3053		 * New allocation is contiguous with a real allocation
3054		 * on the left.
3055		 * Merge the new allocation with the left neighbor.
3056		 */
3057		--bma->idx;
3058		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3059		xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3060			left.br_blockcount + new->br_blockcount);
3061		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3062
3063		if (bma->cur == NULL) {
3064			rval = xfs_ilog_fext(whichfork);
3065		} else {
3066			rval = 0;
3067			error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
3068					left.br_startblock, left.br_blockcount,
3069					&i);
3070			if (error)
3071				goto done;
3072			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3073			error = xfs_bmbt_update(bma->cur, left.br_startoff,
3074					left.br_startblock,
3075					left.br_blockcount +
3076						new->br_blockcount,
3077					left.br_state);
3078			if (error)
3079				goto done;
3080		}
3081		break;
3082
3083	case BMAP_RIGHT_CONTIG:
3084		/*
3085		 * New allocation is contiguous with a real allocation
3086		 * on the right.
3087		 * Merge the new allocation with the right neighbor.
3088		 */
3089		trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3090		xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
3091			new->br_startoff, new->br_startblock,
3092			new->br_blockcount + right.br_blockcount,
3093			right.br_state);
3094		trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3095
3096		if (bma->cur == NULL) {
3097			rval = xfs_ilog_fext(whichfork);
3098		} else {
3099			rval = 0;
3100			error = xfs_bmbt_lookup_eq(bma->cur,
3101					right.br_startoff,
3102					right.br_startblock,
3103					right.br_blockcount, &i);
3104			if (error)
3105				goto done;
3106			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3107			error = xfs_bmbt_update(bma->cur, new->br_startoff,
3108					new->br_startblock,
3109					new->br_blockcount +
3110						right.br_blockcount,
3111					right.br_state);
3112			if (error)
3113				goto done;
3114		}
3115		break;
3116
3117	case 0:
3118		/*
3119		 * New allocation is not contiguous with another
3120		 * real allocation.
3121		 * Insert a new entry.
3122		 */
3123		xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
3124		XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3125			XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
3126		if (bma->cur == NULL) {
3127			rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3128		} else {
3129			rval = XFS_ILOG_CORE;
3130			error = xfs_bmbt_lookup_eq(bma->cur,
3131					new->br_startoff,
3132					new->br_startblock,
3133					new->br_blockcount, &i);
3134			if (error)
3135				goto done;
3136			XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3137			bma->cur->bc_rec.b.br_state = new->br_state;
3138			error = xfs_btree_insert(bma->cur, &i);
3139			if (error)
3140				goto done;
3141			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3142		}
3143		break;
3144	}
3145
3146	/* convert to a btree if necessary */
3147	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3148		int	tmp_logflags;	/* partial log flag return val */
3149
3150		ASSERT(bma->cur == NULL);
3151		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3152				bma->firstblock, bma->flist, &bma->cur,
3153				0, &tmp_logflags, whichfork);
3154		bma->logflags |= tmp_logflags;
3155		if (error)
3156			goto done;
3157	}
3158
3159	/* clear out the allocated field, done with it now in any case. */
3160	if (bma->cur)
3161		bma->cur->bc_private.b.allocated = 0;
3162
3163	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
3164done:
3165	bma->logflags |= rval;
3166	return error;
3167}
3168
3169/*
3170 * Functions used in the extent read, allocate and remove paths
3171 */
3172
3173/*
3174 * Adjust the size of the new extent based on di_extsize and rt extsize.
3175 */
3176int
3177xfs_bmap_extsize_align(
3178	xfs_mount_t	*mp,
3179	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
3180	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
3181	xfs_extlen_t	extsz,		/* align to this extent size */
3182	int		rt,		/* is this a realtime inode? */
3183	int		eof,		/* is extent at end-of-file? */
3184	int		delay,		/* creating delalloc extent? */
3185	int		convert,	/* overwriting unwritten extent? */
3186	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
3187	xfs_extlen_t	*lenp)		/* in/out: aligned length */
3188{
3189	xfs_fileoff_t	orig_off;	/* original offset */
3190	xfs_extlen_t	orig_alen;	/* original length */
3191	xfs_fileoff_t	orig_end;	/* original off+len */
3192	xfs_fileoff_t	nexto;		/* next file offset */
3193	xfs_fileoff_t	prevo;		/* previous file offset */
3194	xfs_fileoff_t	align_off;	/* temp for offset */
3195	xfs_extlen_t	align_alen;	/* temp for length */
3196	xfs_extlen_t	temp;		/* temp for calculations */
3197
3198	if (convert)
3199		return 0;
3200
3201	orig_off = align_off = *offp;
3202	orig_alen = align_alen = *lenp;
3203	orig_end = orig_off + orig_alen;
3204
3205	/*
3206	 * If this request overlaps an existing extent, then don't
3207	 * attempt to perform any additional alignment.
3208	 */
3209	if (!delay && !eof &&
3210	    (orig_off >= gotp->br_startoff) &&
3211	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
3212		return 0;
3213	}
3214
3215	/*
3216	 * If the file offset is unaligned vs. the extent size
3217	 * we need to align it.  This will be possible unless
3218	 * the file was previously written with a kernel that didn't
3219	 * perform this alignment, or if a truncate shot us in the
3220	 * foot.
3221	 */
3222	temp = do_mod(orig_off, extsz);
3223	if (temp) {
3224		align_alen += temp;
3225		align_off -= temp;
3226	}
3227
3228	/* Same adjustment for the end of the requested area. */
3229	temp = (align_alen % extsz);
3230	if (temp)
3231		align_alen += extsz - temp;
3232
3233	/*
3234	 * For large extent hint sizes, the aligned extent might be larger than
3235	 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3236	 * the length back under MAXEXTLEN. The outer allocation loops handle
3237	 * short allocation just fine, so it is safe to do this. We only want to
3238	 * do it when we are forced to, though, because it means more allocation
3239	 * operations are required.
3240	 */
3241	while (align_alen > MAXEXTLEN)
3242		align_alen -= extsz;
3243	ASSERT(align_alen <= MAXEXTLEN);
3244
3245	/*
3246	 * If the previous block overlaps with this proposed allocation
3247	 * then move the start forward without adjusting the length.
3248	 */
3249	if (prevp->br_startoff != NULLFILEOFF) {
3250		if (prevp->br_startblock == HOLESTARTBLOCK)
3251			prevo = prevp->br_startoff;
3252		else
3253			prevo = prevp->br_startoff + prevp->br_blockcount;
3254	} else
3255		prevo = 0;
3256	if (align_off != orig_off && align_off < prevo)
3257		align_off = prevo;
3258	/*
3259	 * If the next block overlaps with this proposed allocation
3260	 * then move the start back without adjusting the length,
3261	 * but not before offset 0.
3262	 * This may of course make the start overlap previous block,
3263	 * and if we hit the offset 0 limit then the next block
3264	 * can still overlap too.
3265	 */
3266	if (!eof && gotp->br_startoff != NULLFILEOFF) {
3267		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3268		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3269			nexto = gotp->br_startoff + gotp->br_blockcount;
3270		else
3271			nexto = gotp->br_startoff;
3272	} else
3273		nexto = NULLFILEOFF;
3274	if (!eof &&
3275	    align_off + align_alen != orig_end &&
3276	    align_off + align_alen > nexto)
3277		align_off = nexto > align_alen ? nexto - align_alen : 0;
3278	/*
3279	 * If we're now overlapping the next or previous extent that
3280	 * means we can't fit an extsz piece in this hole.  Just move
3281	 * the start forward to the first valid spot and set
3282	 * the length so we hit the end.
3283	 */
3284	if (align_off != orig_off && align_off < prevo)
3285		align_off = prevo;
3286	if (align_off + align_alen != orig_end &&
3287	    align_off + align_alen > nexto &&
3288	    nexto != NULLFILEOFF) {
3289		ASSERT(nexto > prevo);
3290		align_alen = nexto - align_off;
3291	}
3292
3293	/*
3294	 * If realtime, and the result isn't a multiple of the realtime
3295	 * extent size we need to remove blocks until it is.
3296	 */
3297	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3298		/*
3299		 * We're not covering the original request, or
3300		 * we won't be able to once we fix the length.
3301		 */
3302		if (orig_off < align_off ||
3303		    orig_end > align_off + align_alen ||
3304		    align_alen - temp < orig_alen)
3305			return -EINVAL;
3306		/*
3307		 * Try to fix it by moving the start up.
3308		 */
3309		if (align_off + temp <= orig_off) {
3310			align_alen -= temp;
3311			align_off += temp;
3312		}
3313		/*
3314		 * Try to fix it by moving the end in.
3315		 */
3316		else if (align_off + align_alen - temp >= orig_end)
3317			align_alen -= temp;
3318		/*
3319		 * Set the start to the minimum then trim the length.
3320		 */
3321		else {
3322			align_alen -= orig_off - align_off;
3323			align_off = orig_off;
3324			align_alen -= align_alen % mp->m_sb.sb_rextsize;
3325		}
3326		/*
3327		 * Result doesn't cover the request, fail it.
3328		 */
3329		if (orig_off < align_off || orig_end > align_off + align_alen)
3330			return -EINVAL;
3331	} else {
3332		ASSERT(orig_off >= align_off);
3333		/* see MAXEXTLEN handling above */
3334		ASSERT(orig_end <= align_off + align_alen ||
3335		       align_alen + extsz > MAXEXTLEN);
3336	}
3337
3338#ifdef DEBUG
3339	if (!eof && gotp->br_startoff != NULLFILEOFF)
3340		ASSERT(align_off + align_alen <= gotp->br_startoff);
3341	if (prevp->br_startoff != NULLFILEOFF)
3342		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3343#endif
3344
3345	*lenp = align_alen;
3346	*offp = align_off;
3347	return 0;
3348}
3349
3350#define XFS_ALLOC_GAP_UNITS	4
3351
3352void
3353xfs_bmap_adjacent(
3354	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3355{
3356	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
3357	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3358	xfs_mount_t	*mp;		/* mount point structure */
3359	int		nullfb;		/* true if ap->firstblock isn't set */
3360	int		rt;		/* true if inode is realtime */
3361
3362#define	ISVALID(x,y)	\
3363	(rt ? \
3364		(x) < mp->m_sb.sb_rblocks : \
3365		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3366		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3367		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3368
3369	mp = ap->ip->i_mount;
3370	nullfb = *ap->firstblock == NULLFSBLOCK;
3371	rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
3372	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3373	/*
3374	 * If allocating at eof, and there's a previous real block,
3375	 * try to use its last block as our starting point.
3376	 */
3377	if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3378	    !isnullstartblock(ap->prev.br_startblock) &&
3379	    ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3380		    ap->prev.br_startblock)) {
3381		ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3382		/*
3383		 * Adjust for the gap between prevp and us.
3384		 */
3385		adjust = ap->offset -
3386			(ap->prev.br_startoff + ap->prev.br_blockcount);
3387		if (adjust &&
3388		    ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3389			ap->blkno += adjust;
3390	}
3391	/*
3392	 * If not at eof, then compare the two neighbor blocks.
3393	 * Figure out whether either one gives us a good starting point,
3394	 * and pick the better one.
3395	 */
3396	else if (!ap->eof) {
3397		xfs_fsblock_t	gotbno;		/* right side block number */
3398		xfs_fsblock_t	gotdiff=0;	/* right side difference */
3399		xfs_fsblock_t	prevbno;	/* left side block number */
3400		xfs_fsblock_t	prevdiff=0;	/* left side difference */
3401
3402		/*
3403		 * If there's a previous (left) block, select a requested
3404		 * start block based on it.
3405		 */
3406		if (ap->prev.br_startoff != NULLFILEOFF &&
3407		    !isnullstartblock(ap->prev.br_startblock) &&
3408		    (prevbno = ap->prev.br_startblock +
3409			       ap->prev.br_blockcount) &&
3410		    ISVALID(prevbno, ap->prev.br_startblock)) {
3411			/*
3412			 * Calculate gap to end of previous block.
3413			 */
3414			adjust = prevdiff = ap->offset -
3415				(ap->prev.br_startoff +
3416				 ap->prev.br_blockcount);
3417			/*
3418			 * Figure the startblock based on the previous block's
3419			 * end and the gap size.
3420			 * Heuristic!
3421			 * If the gap is large relative to the piece we're
3422			 * allocating, or using it gives us an invalid block
3423			 * number, then just use the end of the previous block.
3424			 */
3425			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3426			    ISVALID(prevbno + prevdiff,
3427				    ap->prev.br_startblock))
3428				prevbno += adjust;
3429			else
3430				prevdiff += adjust;
3431			/*
3432			 * If the firstblock forbids it, can't use it,
3433			 * must use default.
3434			 */
3435			if (!rt && !nullfb &&
3436			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3437				prevbno = NULLFSBLOCK;
3438		}
3439		/*
3440		 * No previous block or can't follow it, just default.
3441		 */
3442		else
3443			prevbno = NULLFSBLOCK;
3444		/*
3445		 * If there's a following (right) block, select a requested
3446		 * start block based on it.
3447		 */
3448		if (!isnullstartblock(ap->got.br_startblock)) {
3449			/*
3450			 * Calculate gap to start of next block.
3451			 */
3452			adjust = gotdiff = ap->got.br_startoff - ap->offset;
3453			/*
3454			 * Figure the startblock based on the next block's
3455			 * start and the gap size.
3456			 */
3457			gotbno = ap->got.br_startblock;
3458			/*
3459			 * Heuristic!
3460			 * If the gap is large relative to the piece we're
3461			 * allocating, or using it gives us an invalid block
3462			 * number, then just use the start of the next block
3463			 * offset by our length.
3464			 */
3465			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3466			    ISVALID(gotbno - gotdiff, gotbno))
3467				gotbno -= adjust;
3468			else if (ISVALID(gotbno - ap->length, gotbno)) {
3469				gotbno -= ap->length;
3470				gotdiff += adjust - ap->length;
3471			} else
3472				gotdiff += adjust;
3473			/*
3474			 * If the firstblock forbids it, can't use it,
3475			 * must use default.
3476			 */
3477			if (!rt && !nullfb &&
3478			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3479				gotbno = NULLFSBLOCK;
3480		}
3481		/*
3482		 * No next block, just default.
3483		 */
3484		else
3485			gotbno = NULLFSBLOCK;
3486		/*
3487		 * If both valid, pick the better one, else the only good
3488		 * one, else ap->blkno is already set (to 0 or the inode block).
3489		 */
3490		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3491			ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3492		else if (prevbno != NULLFSBLOCK)
3493			ap->blkno = prevbno;
3494		else if (gotbno != NULLFSBLOCK)
3495			ap->blkno = gotbno;
3496	}
3497#undef ISVALID
3498}
3499
3500static int
3501xfs_bmap_longest_free_extent(
3502	struct xfs_trans	*tp,
3503	xfs_agnumber_t		ag,
3504	xfs_extlen_t		*blen,
3505	int			*notinit)
3506{
3507	struct xfs_mount	*mp = tp->t_mountp;
3508	struct xfs_perag	*pag;
3509	xfs_extlen_t		longest;
3510	int			error = 0;
3511
3512	pag = xfs_perag_get(mp, ag);
3513	if (!pag->pagf_init) {
3514		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3515		if (error)
3516			goto out;
3517
3518		if (!pag->pagf_init) {
3519			*notinit = 1;
3520			goto out;
3521		}
3522	}
3523
3524	longest = xfs_alloc_longest_free_extent(mp, pag);
3525	if (*blen < longest)
3526		*blen = longest;
3527
3528out:
3529	xfs_perag_put(pag);
3530	return error;
3531}
3532
3533static void
3534xfs_bmap_select_minlen(
3535	struct xfs_bmalloca	*ap,
3536	struct xfs_alloc_arg	*args,
3537	xfs_extlen_t		*blen,
3538	int			notinit)
3539{
3540	if (notinit || *blen < ap->minlen) {
3541		/*
3542		 * Since we did a BUF_TRYLOCK above, it is possible that
3543		 * there is space for this request.
3544		 */
3545		args->minlen = ap->minlen;
3546	} else if (*blen < args->maxlen) {
3547		/*
3548		 * If the best seen length is less than the request length,
3549		 * use the best as the minimum.
3550		 */
3551		args->minlen = *blen;
3552	} else {
3553		/*
3554		 * Otherwise we've seen an extent as big as maxlen, use that
3555		 * as the minimum.
3556		 */
3557		args->minlen = args->maxlen;
3558	}
3559}
3560
3561STATIC int
3562xfs_bmap_btalloc_nullfb(
3563	struct xfs_bmalloca	*ap,
3564	struct xfs_alloc_arg	*args,
3565	xfs_extlen_t		*blen)
3566{
3567	struct xfs_mount	*mp = ap->ip->i_mount;
3568	xfs_agnumber_t		ag, startag;
3569	int			notinit = 0;
3570	int			error;
3571
3572	args->type = XFS_ALLOCTYPE_START_BNO;
3573	args->total = ap->total;
3574
3575	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3576	if (startag == NULLAGNUMBER)
3577		startag = ag = 0;
3578
3579	while (*blen < args->maxlen) {
3580		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3581						     &notinit);
3582		if (error)
3583			return error;
3584
3585		if (++ag == mp->m_sb.sb_agcount)
3586			ag = 0;
3587		if (ag == startag)
3588			break;
3589	}
3590
3591	xfs_bmap_select_minlen(ap, args, blen, notinit);
3592	return 0;
3593}
3594
3595STATIC int
3596xfs_bmap_btalloc_filestreams(
3597	struct xfs_bmalloca	*ap,
3598	struct xfs_alloc_arg	*args,
3599	xfs_extlen_t		*blen)
3600{
3601	struct xfs_mount	*mp = ap->ip->i_mount;
3602	xfs_agnumber_t		ag;
3603	int			notinit = 0;
3604	int			error;
3605
3606	args->type = XFS_ALLOCTYPE_NEAR_BNO;
3607	args->total = ap->total;
3608
3609	ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3610	if (ag == NULLAGNUMBER)
3611		ag = 0;
3612
3613	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3614	if (error)
3615		return error;
3616
3617	if (*blen < args->maxlen) {
3618		error = xfs_filestream_new_ag(ap, &ag);
3619		if (error)
3620			return error;
3621
3622		error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3623						     &notinit);
3624		if (error)
3625			return error;
3626
3627	}
3628
3629	xfs_bmap_select_minlen(ap, args, blen, notinit);
3630
3631	/*
3632	 * Set the failure fallback case to look in the selected AG as stream
3633	 * may have moved.
3634	 */
3635	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3636	return 0;
3637}
3638
3639STATIC int
3640xfs_bmap_btalloc(
3641	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3642{
3643	xfs_mount_t	*mp;		/* mount point structure */
3644	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
3645	xfs_extlen_t	align;		/* minimum allocation alignment */
3646	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
3647	xfs_agnumber_t	ag;
3648	xfs_alloc_arg_t	args;
3649	xfs_extlen_t	blen;
3650	xfs_extlen_t	nextminlen = 0;
3651	int		nullfb;		/* true if ap->firstblock isn't set */
3652	int		isaligned;
3653	int		tryagain;
3654	int		error;
3655	int		stripe_align;
3656
3657	ASSERT(ap->length);
3658
3659	mp = ap->ip->i_mount;
3660
3661	/* stripe alignment for allocation is determined by mount parameters */
3662	stripe_align = 0;
3663	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3664		stripe_align = mp->m_swidth;
3665	else if (mp->m_dalign)
3666		stripe_align = mp->m_dalign;
3667
3668	align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
3669	if (unlikely(align)) {
3670		error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3671						align, 0, ap->eof, 0, ap->conv,
3672						&ap->offset, &ap->length);
3673		ASSERT(!error);
3674		ASSERT(ap->length);
3675	}
3676
3677
3678	nullfb = *ap->firstblock == NULLFSBLOCK;
3679	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3680	if (nullfb) {
3681		if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
3682			ag = xfs_filestream_lookup_ag(ap->ip);
3683			ag = (ag != NULLAGNUMBER) ? ag : 0;
3684			ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3685		} else {
3686			ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3687		}
3688	} else
3689		ap->blkno = *ap->firstblock;
3690
3691	xfs_bmap_adjacent(ap);
3692
3693	/*
3694	 * If allowed, use ap->blkno; otherwise must use firstblock since
3695	 * it's in the right allocation group.
3696	 */
3697	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3698		;
3699	else
3700		ap->blkno = *ap->firstblock;
3701	/*
3702	 * Normal allocation, done through xfs_alloc_vextent.
3703	 */
3704	tryagain = isaligned = 0;
3705	memset(&args, 0, sizeof(args));
3706	args.tp = ap->tp;
3707	args.mp = mp;
3708	args.fsbno = ap->blkno;
3709
3710	/* Trim the allocation back to the maximum an AG can fit. */
3711	args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
3712	args.firstblock = *ap->firstblock;
3713	blen = 0;
3714	if (nullfb) {
3715		/*
3716		 * Search for an allocation group with a single extent large
3717		 * enough for the request.  If one isn't found, then adjust
3718		 * the minimum allocation size to the largest space found.
3719		 */
3720		if (ap->userdata && xfs_inode_is_filestream(ap->ip))
3721			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3722		else
3723			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3724		if (error)
3725			return error;
3726	} else if (ap->flist->xbf_low) {
3727		if (xfs_inode_is_filestream(ap->ip))
3728			args.type = XFS_ALLOCTYPE_FIRST_AG;
3729		else
3730			args.type = XFS_ALLOCTYPE_START_BNO;
3731		args.total = args.minlen = ap->minlen;
3732	} else {
3733		args.type = XFS_ALLOCTYPE_NEAR_BNO;
3734		args.total = ap->total;
3735		args.minlen = ap->minlen;
3736	}
3737	/* apply extent size hints if obtained earlier */
3738	if (unlikely(align)) {
3739		args.prod = align;
3740		if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3741			args.mod = (xfs_extlen_t)(args.prod - args.mod);
3742	} else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
3743		args.prod = 1;
3744		args.mod = 0;
3745	} else {
3746		args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
3747		if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3748			args.mod = (xfs_extlen_t)(args.prod - args.mod);
3749	}
3750	/*
3751	 * If we are not low on available data blocks, and the
3752	 * underlying logical volume manager is a stripe, and
3753	 * the file offset is zero then try to allocate data
3754	 * blocks on stripe unit boundary.
3755	 * NOTE: ap->aeof is only set if the allocation length
3756	 * is >= the stripe unit and the allocation offset is
3757	 * at the end of file.
3758	 */
3759	if (!ap->flist->xbf_low && ap->aeof) {
3760		if (!ap->offset) {
3761			args.alignment = stripe_align;
3762			atype = args.type;
3763			isaligned = 1;
3764			/*
3765			 * Adjust for alignment
3766			 */
3767			if (blen > args.alignment && blen <= args.maxlen)
3768				args.minlen = blen - args.alignment;
3769			args.minalignslop = 0;
3770		} else {
3771			/*
3772			 * First try an exact bno allocation.
3773			 * If it fails then do a near or start bno
3774			 * allocation with alignment turned on.
3775			 */
3776			atype = args.type;
3777			tryagain = 1;
3778			args.type = XFS_ALLOCTYPE_THIS_BNO;
3779			args.alignment = 1;
3780			/*
3781			 * Compute the minlen+alignment for the
3782			 * next case.  Set slop so that the value
3783			 * of minlen+alignment+slop doesn't go up
3784			 * between the calls.
3785			 */
3786			if (blen > stripe_align && blen <= args.maxlen)
3787				nextminlen = blen - stripe_align;
3788			else
3789				nextminlen = args.minlen;
3790			if (nextminlen + stripe_align > args.minlen + 1)
3791				args.minalignslop =
3792					nextminlen + stripe_align -
3793					args.minlen - 1;
3794			else
3795				args.minalignslop = 0;
3796		}
3797	} else {
3798		args.alignment = 1;
3799		args.minalignslop = 0;
3800	}
3801	args.minleft = ap->minleft;
3802	args.wasdel = ap->wasdel;
3803	args.isfl = 0;
3804	args.userdata = ap->userdata;
3805	if ((error = xfs_alloc_vextent(&args)))
3806		return error;
3807	if (tryagain && args.fsbno == NULLFSBLOCK) {
3808		/*
3809		 * Exact allocation failed. Now try with alignment
3810		 * turned on.
3811		 */
3812		args.type = atype;
3813		args.fsbno = ap->blkno;
3814		args.alignment = stripe_align;
3815		args.minlen = nextminlen;
3816		args.minalignslop = 0;
3817		isaligned = 1;
3818		if ((error = xfs_alloc_vextent(&args)))
3819			return error;
3820	}
3821	if (isaligned && args.fsbno == NULLFSBLOCK) {
3822		/*
3823		 * allocation failed, so turn off alignment and
3824		 * try again.
3825		 */
3826		args.type = atype;
3827		args.fsbno = ap->blkno;
3828		args.alignment = 0;
3829		if ((error = xfs_alloc_vextent(&args)))
3830			return error;
3831	}
3832	if (args.fsbno == NULLFSBLOCK && nullfb &&
3833	    args.minlen > ap->minlen) {
3834		args.minlen = ap->minlen;
3835		args.type = XFS_ALLOCTYPE_START_BNO;
3836		args.fsbno = ap->blkno;
3837		if ((error = xfs_alloc_vextent(&args)))
3838			return error;
3839	}
3840	if (args.fsbno == NULLFSBLOCK && nullfb) {
3841		args.fsbno = 0;
3842		args.type = XFS_ALLOCTYPE_FIRST_AG;
3843		args.total = ap->minlen;
3844		args.minleft = 0;
3845		if ((error = xfs_alloc_vextent(&args)))
3846			return error;
3847		ap->flist->xbf_low = 1;
3848	}
3849	if (args.fsbno != NULLFSBLOCK) {
3850		/*
3851		 * check the allocation happened at the same or higher AG than
3852		 * the first block that was allocated.
3853		 */
3854		ASSERT(*ap->firstblock == NULLFSBLOCK ||
3855		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3856		       XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3857		       (ap->flist->xbf_low &&
3858			XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3859			XFS_FSB_TO_AGNO(mp, args.fsbno)));
3860
3861		ap->blkno = args.fsbno;
3862		if (*ap->firstblock == NULLFSBLOCK)
3863			*ap->firstblock = args.fsbno;
3864		ASSERT(nullfb || fb_agno == args.agno ||
3865		       (ap->flist->xbf_low && fb_agno < args.agno));
3866		ap->length = args.len;
3867		ap->ip->i_d.di_nblocks += args.len;
3868		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3869		if (ap->wasdel)
3870			ap->ip->i_delayed_blks -= args.len;
3871		/*
3872		 * Adjust the disk quota also. This was reserved
3873		 * earlier.
3874		 */
3875		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3876			ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3877					XFS_TRANS_DQ_BCOUNT,
3878			(long) args.len);
3879	} else {
3880		ap->blkno = NULLFSBLOCK;
3881		ap->length = 0;
3882	}
3883	return 0;
3884}
3885
3886/*
3887 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3888 * It figures out where to ask the underlying allocator to put the new extent.
3889 */
3890STATIC int
3891xfs_bmap_alloc(
3892	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
3893{
3894	if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
3895		return xfs_bmap_rtalloc(ap);
3896	return xfs_bmap_btalloc(ap);
3897}
3898
3899/*
3900 * Trim the returned map to the required bounds
3901 */
3902STATIC void
3903xfs_bmapi_trim_map(
3904	struct xfs_bmbt_irec	*mval,
3905	struct xfs_bmbt_irec	*got,
3906	xfs_fileoff_t		*bno,
3907	xfs_filblks_t		len,
3908	xfs_fileoff_t		obno,
3909	xfs_fileoff_t		end,
3910	int			n,
3911	int			flags)
3912{
3913	if ((flags & XFS_BMAPI_ENTIRE) ||
3914	    got->br_startoff + got->br_blockcount <= obno) {
3915		*mval = *got;
3916		if (isnullstartblock(got->br_startblock))
3917			mval->br_startblock = DELAYSTARTBLOCK;
3918		return;
3919	}
3920
3921	if (obno > *bno)
3922		*bno = obno;
3923	ASSERT((*bno >= obno) || (n == 0));
3924	ASSERT(*bno < end);
3925	mval->br_startoff = *bno;
3926	if (isnullstartblock(got->br_startblock))
3927		mval->br_startblock = DELAYSTARTBLOCK;
3928	else
3929		mval->br_startblock = got->br_startblock +
3930					(*bno - got->br_startoff);
3931	/*
3932	 * Return the minimum of what we got and what we asked for for
3933	 * the length.  We can use the len variable here because it is
3934	 * modified below and we could have been there before coming
3935	 * here if the first part of the allocation didn't overlap what
3936	 * was asked for.
3937	 */
3938	mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3939			got->br_blockcount - (*bno - got->br_startoff));
3940	mval->br_state = got->br_state;
3941	ASSERT(mval->br_blockcount <= len);
3942	return;
3943}
3944
3945/*
3946 * Update and validate the extent map to return
3947 */
3948STATIC void
3949xfs_bmapi_update_map(
3950	struct xfs_bmbt_irec	**map,
3951	xfs_fileoff_t		*bno,
3952	xfs_filblks_t		*len,
3953	xfs_fileoff_t		obno,
3954	xfs_fileoff_t		end,
3955	int			*n,
3956	int			flags)
3957{
3958	xfs_bmbt_irec_t	*mval = *map;
3959
3960	ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3961	       ((mval->br_startoff + mval->br_blockcount) <= end));
3962	ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3963	       (mval->br_startoff < obno));
3964
3965	*bno = mval->br_startoff + mval->br_blockcount;
3966	*len = end - *bno;
3967	if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3968		/* update previous map with new information */
3969		ASSERT(mval->br_startblock == mval[-1].br_startblock);
3970		ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3971		ASSERT(mval->br_state == mval[-1].br_state);
3972		mval[-1].br_blockcount = mval->br_blockcount;
3973		mval[-1].br_state = mval->br_state;
3974	} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3975		   mval[-1].br_startblock != DELAYSTARTBLOCK &&
3976		   mval[-1].br_startblock != HOLESTARTBLOCK &&
3977		   mval->br_startblock == mval[-1].br_startblock +
3978					  mval[-1].br_blockcount &&
3979		   ((flags & XFS_BMAPI_IGSTATE) ||
3980			mval[-1].br_state == mval->br_state)) {
3981		ASSERT(mval->br_startoff ==
3982		       mval[-1].br_startoff + mval[-1].br_blockcount);
3983		mval[-1].br_blockcount += mval->br_blockcount;
3984	} else if (*n > 0 &&
3985		   mval->br_startblock == DELAYSTARTBLOCK &&
3986		   mval[-1].br_startblock == DELAYSTARTBLOCK &&
3987		   mval->br_startoff ==
3988		   mval[-1].br_startoff + mval[-1].br_blockcount) {
3989		mval[-1].br_blockcount += mval->br_blockcount;
3990		mval[-1].br_state = mval->br_state;
3991	} else if (!((*n == 0) &&
3992		     ((mval->br_startoff + mval->br_blockcount) <=
3993		      obno))) {
3994		mval++;
3995		(*n)++;
3996	}
3997	*map = mval;
3998}
3999
4000/*
4001 * Map file blocks to filesystem blocks without allocation.
4002 */
4003int
4004xfs_bmapi_read(
4005	struct xfs_inode	*ip,
4006	xfs_fileoff_t		bno,
4007	xfs_filblks_t		len,
4008	struct xfs_bmbt_irec	*mval,
4009	int			*nmap,
4010	int			flags)
4011{
4012	struct xfs_mount	*mp = ip->i_mount;
4013	struct xfs_ifork	*ifp;
4014	struct xfs_bmbt_irec	got;
4015	struct xfs_bmbt_irec	prev;
4016	xfs_fileoff_t		obno;
4017	xfs_fileoff_t		end;
4018	xfs_extnum_t		lastx;
4019	int			error;
4020	int			eof;
4021	int			n = 0;
4022	int			whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4023						XFS_ATTR_FORK : XFS_DATA_FORK;
4024
4025	ASSERT(*nmap >= 1);
4026	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
4027			   XFS_BMAPI_IGSTATE)));
4028	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
4029
4030	if (unlikely(XFS_TEST_ERROR(
4031	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4032	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4033	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4034		XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4035		return -EFSCORRUPTED;
4036	}
4037
4038	if (XFS_FORCED_SHUTDOWN(mp))
4039		return -EIO;
4040
4041	XFS_STATS_INC(xs_blk_mapr);
4042
4043	ifp = XFS_IFORK_PTR(ip, whichfork);
4044
4045	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4046		error = xfs_iread_extents(NULL, ip, whichfork);
4047		if (error)
4048			return error;
4049	}
4050
4051	xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4052	end = bno + len;
4053	obno = bno;
4054
4055	while (bno < end && n < *nmap) {
4056		/* Reading past eof, act as though there's a hole up to end. */
4057		if (eof)
4058			got.br_startoff = end;
4059		if (got.br_startoff > bno) {
4060			/* Reading in a hole.  */
4061			mval->br_startoff = bno;
4062			mval->br_startblock = HOLESTARTBLOCK;
4063			mval->br_blockcount =
4064				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4065			mval->br_state = XFS_EXT_NORM;
4066			bno += mval->br_blockcount;
4067			len -= mval->br_blockcount;
4068			mval++;
4069			n++;
4070			continue;
4071		}
4072
4073		/* set up the extent map to return. */
4074		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4075		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4076
4077		/* If we're done, stop now. */
4078		if (bno >= end || n >= *nmap)
4079			break;
4080
4081		/* Else go on to the next record. */
4082		if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4083			xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4084		else
4085			eof = 1;
4086	}
4087	*nmap = n;
4088	return 0;
4089}
4090
4091STATIC int
4092xfs_bmapi_reserve_delalloc(
4093	struct xfs_inode	*ip,
4094	xfs_fileoff_t		aoff,
4095	xfs_filblks_t		len,
4096	struct xfs_bmbt_irec	*got,
4097	struct xfs_bmbt_irec	*prev,
4098	xfs_extnum_t		*lastx,
4099	int			eof)
4100{
4101	struct xfs_mount	*mp = ip->i_mount;
4102	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4103	xfs_extlen_t		alen;
4104	xfs_extlen_t		indlen;
4105	char			rt = XFS_IS_REALTIME_INODE(ip);
4106	xfs_extlen_t		extsz;
4107	int			error;
4108
4109	alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
4110	if (!eof)
4111		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4112
4113	/* Figure out the extent size, adjust alen */
4114	extsz = xfs_get_extsz_hint(ip);
4115	if (extsz) {
4116		error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
4117					       1, 0, &aoff, &alen);
4118		ASSERT(!error);
4119	}
4120
4121	if (rt)
4122		extsz = alen / mp->m_sb.sb_rextsize;
4123
4124	/*
4125	 * Make a transaction-less quota reservation for delayed allocation
4126	 * blocks.  This number gets adjusted later.  We return if we haven't
4127	 * allocated blocks already inside this loop.
4128	 */
4129	error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4130			rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4131	if (error)
4132		return error;
4133
4134	/*
4135	 * Split changing sb for alen and indlen since they could be coming
4136	 * from different places.
4137	 */
4138	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4139	ASSERT(indlen > 0);
4140
4141	if (rt) {
4142		error = xfs_mod_frextents(mp, -((int64_t)extsz));
4143	} else {
4144		error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4145	}
4146
4147	if (error)
4148		goto out_unreserve_quota;
4149
4150	error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4151	if (error)
4152		goto out_unreserve_blocks;
4153
4154
4155	ip->i_delayed_blks += alen;
4156
4157	got->br_startoff = aoff;
4158	got->br_startblock = nullstartblock(indlen);
4159	got->br_blockcount = alen;
4160	got->br_state = XFS_EXT_NORM;
4161	xfs_bmap_add_extent_hole_delay(ip, lastx, got);
4162
4163	/*
4164	 * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4165	 * might have merged it into one of the neighbouring ones.
4166	 */
4167	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4168
4169	ASSERT(got->br_startoff <= aoff);
4170	ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4171	ASSERT(isnullstartblock(got->br_startblock));
4172	ASSERT(got->br_state == XFS_EXT_NORM);
4173	return 0;
4174
4175out_unreserve_blocks:
4176	if (rt)
4177		xfs_mod_frextents(mp, extsz);
4178	else
4179		xfs_mod_fdblocks(mp, alen, false);
4180out_unreserve_quota:
4181	if (XFS_IS_QUOTA_ON(mp))
4182		xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
4183				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4184	return error;
4185}
4186
4187/*
4188 * Map file blocks to filesystem blocks, adding delayed allocations as needed.
4189 */
4190int
4191xfs_bmapi_delay(
4192	struct xfs_inode	*ip,	/* incore inode */
4193	xfs_fileoff_t		bno,	/* starting file offs. mapped */
4194	xfs_filblks_t		len,	/* length to map in file */
4195	struct xfs_bmbt_irec	*mval,	/* output: map values */
4196	int			*nmap,	/* i/o: mval size/count */
4197	int			flags)	/* XFS_BMAPI_... */
4198{
4199	struct xfs_mount	*mp = ip->i_mount;
4200	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4201	struct xfs_bmbt_irec	got;	/* current file extent record */
4202	struct xfs_bmbt_irec	prev;	/* previous file extent record */
4203	xfs_fileoff_t		obno;	/* old block number (offset) */
4204	xfs_fileoff_t		end;	/* end of mapped file region */
4205	xfs_extnum_t		lastx;	/* last useful extent number */
4206	int			eof;	/* we've hit the end of extents */
4207	int			n = 0;	/* current extent index */
4208	int			error = 0;
4209
4210	ASSERT(*nmap >= 1);
4211	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4212	ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
4213	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4214
4215	if (unlikely(XFS_TEST_ERROR(
4216	    (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4217	     XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4218	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4219		XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
4220		return -EFSCORRUPTED;
4221	}
4222
4223	if (XFS_FORCED_SHUTDOWN(mp))
4224		return -EIO;
4225
4226	XFS_STATS_INC(xs_blk_mapw);
4227
4228	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4229		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4230		if (error)
4231			return error;
4232	}
4233
4234	xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
4235	end = bno + len;
4236	obno = bno;
4237
4238	while (bno < end && n < *nmap) {
4239		if (eof || got.br_startoff > bno) {
4240			error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
4241							   &prev, &lastx, eof);
4242			if (error) {
4243				if (n == 0) {
4244					*nmap = 0;
4245					return error;
4246				}
4247				break;
4248			}
4249		}
4250
4251		/* set up the extent map to return. */
4252		xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4253		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4254
4255		/* If we're done, stop now. */
4256		if (bno >= end || n >= *nmap)
4257			break;
4258
4259		/* Else go on to the next record. */
4260		prev = got;
4261		if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4262			xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4263		else
4264			eof = 1;
4265	}
4266
4267	*nmap = n;
4268	return 0;
4269}
4270
4271
4272static int
4273xfs_bmapi_allocate(
4274	struct xfs_bmalloca	*bma)
4275{
4276	struct xfs_mount	*mp = bma->ip->i_mount;
4277	int			whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4278						XFS_ATTR_FORK : XFS_DATA_FORK;
4279	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4280	int			tmp_logflags = 0;
4281	int			error;
4282
4283	ASSERT(bma->length > 0);
4284
4285	/*
4286	 * For the wasdelay case, we could also just allocate the stuff asked
4287	 * for in this bmap call but that wouldn't be as good.
4288	 */
4289	if (bma->wasdel) {
4290		bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4291		bma->offset = bma->got.br_startoff;
4292		if (bma->idx != NULLEXTNUM && bma->idx) {
4293			xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
4294					 &bma->prev);
4295		}
4296	} else {
4297		bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4298		if (!bma->eof)
4299			bma->length = XFS_FILBLKS_MIN(bma->length,
4300					bma->got.br_startoff - bma->offset);
4301	}
4302
4303	/*
4304	 * Indicate if this is the first user data in the file, or just any
4305	 * user data.
4306	 */
4307	if (!(bma->flags & XFS_BMAPI_METADATA)) {
4308		bma->userdata = (bma->offset == 0) ?
4309			XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4310	}
4311
4312	bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4313
4314	/*
4315	 * Only want to do the alignment at the eof if it is userdata and
4316	 * allocation length is larger than a stripe unit.
4317	 */
4318	if (mp->m_dalign && bma->length >= mp->m_dalign &&
4319	    !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4320		error = xfs_bmap_isaeof(bma, whichfork);
4321		if (error)
4322			return error;
4323	}
4324
4325	error = xfs_bmap_alloc(bma);
4326	if (error)
4327		return error;
4328
4329	if (bma->flist->xbf_low)
4330		bma->minleft = 0;
4331	if (bma->cur)
4332		bma->cur->bc_private.b.firstblock = *bma->firstblock;
4333	if (bma->blkno == NULLFSBLOCK)
4334		return 0;
4335	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4336		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4337		bma->cur->bc_private.b.firstblock = *bma->firstblock;
4338		bma->cur->bc_private.b.flist = bma->flist;
4339	}
4340	/*
4341	 * Bump the number of extents we've allocated
4342	 * in this call.
4343	 */
4344	bma->nallocs++;
4345
4346	if (bma->cur)
4347		bma->cur->bc_private.b.flags =
4348			bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4349
4350	bma->got.br_startoff = bma->offset;
4351	bma->got.br_startblock = bma->blkno;
4352	bma->got.br_blockcount = bma->length;
4353	bma->got.br_state = XFS_EXT_NORM;
4354
4355	/*
4356	 * A wasdelay extent has been initialized, so shouldn't be flagged
4357	 * as unwritten.
4358	 */
4359	if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4360	    xfs_sb_version_hasextflgbit(&mp->m_sb))
4361		bma->got.br_state = XFS_EXT_UNWRITTEN;
4362
4363	if (bma->wasdel)
4364		error = xfs_bmap_add_extent_delay_real(bma);
4365	else
4366		error = xfs_bmap_add_extent_hole_real(bma, whichfork);
4367
4368	bma->logflags |= tmp_logflags;
4369	if (error)
4370		return error;
4371
4372	/*
4373	 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4374	 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4375	 * the neighbouring ones.
4376	 */
4377	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4378
4379	ASSERT(bma->got.br_startoff <= bma->offset);
4380	ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4381	       bma->offset + bma->length);
4382	ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4383	       bma->got.br_state == XFS_EXT_UNWRITTEN);
4384	return 0;
4385}
4386
4387STATIC int
4388xfs_bmapi_convert_unwritten(
4389	struct xfs_bmalloca	*bma,
4390	struct xfs_bmbt_irec	*mval,
4391	xfs_filblks_t		len,
4392	int			flags)
4393{
4394	int			whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4395						XFS_ATTR_FORK : XFS_DATA_FORK;
4396	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4397	int			tmp_logflags = 0;
4398	int			error;
4399
4400	/* check if we need to do unwritten->real conversion */
4401	if (mval->br_state == XFS_EXT_UNWRITTEN &&
4402	    (flags & XFS_BMAPI_PREALLOC))
4403		return 0;
4404
4405	/* check if we need to do real->unwritten conversion */
4406	if (mval->br_state == XFS_EXT_NORM &&
4407	    (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4408			(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4409		return 0;
4410
4411	/*
4412	 * Modify (by adding) the state flag, if writing.
4413	 */
4414	ASSERT(mval->br_blockcount <= len);
4415	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4416		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4417					bma->ip, whichfork);
4418		bma->cur->bc_private.b.firstblock = *bma->firstblock;
4419		bma->cur->bc_private.b.flist = bma->flist;
4420	}
4421	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4422				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4423
4424	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4425			&bma->cur, mval, bma->firstblock, bma->flist,
4426			&tmp_logflags);
4427	bma->logflags |= tmp_logflags;
4428	if (error)
4429		return error;
4430
4431	/*
4432	 * Update our extent pointer, given that
4433	 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4434	 * of the neighbouring ones.
4435	 */
4436	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4437
4438	/*
4439	 * We may have combined previously unwritten space with written space,
4440	 * so generate another request.
4441	 */
4442	if (mval->br_blockcount < len)
4443		return -EAGAIN;
4444	return 0;
4445}
4446
4447/*
4448 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4449 * extent state if necessary.  Details behaviour is controlled by the flags
4450 * parameter.  Only allocates blocks from a single allocation group, to avoid
4451 * locking problems.
4452 *
4453 * The returned value in "firstblock" from the first call in a transaction
4454 * must be remembered and presented to subsequent calls in "firstblock".
4455 * An upper bound for the number of blocks to be allocated is supplied to
4456 * the first call in "total"; if no allocation group has that many free
4457 * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4458 */
4459int
4460xfs_bmapi_write(
4461	struct xfs_trans	*tp,		/* transaction pointer */
4462	struct xfs_inode	*ip,		/* incore inode */
4463	xfs_fileoff_t		bno,		/* starting file offs. mapped */
4464	xfs_filblks_t		len,		/* length to map in file */
4465	int			flags,		/* XFS_BMAPI_... */
4466	xfs_fsblock_t		*firstblock,	/* first allocated block
4467						   controls a.g. for allocs */
4468	xfs_extlen_t		total,		/* total blocks needed */
4469	struct xfs_bmbt_irec	*mval,		/* output: map values */
4470	int			*nmap,		/* i/o: mval size/count */
4471	struct xfs_bmap_free	*flist)		/* i/o: list extents to free */
4472{
4473	struct xfs_mount	*mp = ip->i_mount;
4474	struct xfs_ifork	*ifp;
4475	struct xfs_bmalloca	bma = { NULL };	/* args for xfs_bmap_alloc */
4476	xfs_fileoff_t		end;		/* end of mapped file region */
4477	int			eof;		/* after the end of extents */
4478	int			error;		/* error return */
4479	int			n;		/* current extent index */
4480	xfs_fileoff_t		obno;		/* old block number (offset) */
4481	int			whichfork;	/* data or attr fork */
4482	char			inhole;		/* current location is hole in file */
4483	char			wasdelay;	/* old extent was delayed */
4484
4485#ifdef DEBUG
4486	xfs_fileoff_t		orig_bno;	/* original block number value */
4487	int			orig_flags;	/* original flags arg value */
4488	xfs_filblks_t		orig_len;	/* original value of len arg */
4489	struct xfs_bmbt_irec	*orig_mval;	/* original value of mval */
4490	int			orig_nmap;	/* original value of *nmap */
4491
4492	orig_bno = bno;
4493	orig_len = len;
4494	orig_flags = flags;
4495	orig_mval = mval;
4496	orig_nmap = *nmap;
4497#endif
4498	whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4499		XFS_ATTR_FORK : XFS_DATA_FORK;
4500
4501	ASSERT(*nmap >= 1);
4502	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4503	ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4504	ASSERT(tp != NULL);
4505	ASSERT(len > 0);
4506	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4507	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4508
4509	if (unlikely(XFS_TEST_ERROR(
4510	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4511	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4512	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4513		XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4514		return -EFSCORRUPTED;
4515	}
4516
4517	if (XFS_FORCED_SHUTDOWN(mp))
4518		return -EIO;
4519
4520	ifp = XFS_IFORK_PTR(ip, whichfork);
4521
4522	XFS_STATS_INC(xs_blk_mapw);
4523
4524	if (*firstblock == NULLFSBLOCK) {
4525		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4526			bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4527		else
4528			bma.minleft = 1;
4529	} else {
4530		bma.minleft = 0;
4531	}
4532
4533	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4534		error = xfs_iread_extents(tp, ip, whichfork);
4535		if (error)
4536			goto error0;
4537	}
4538
4539	xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
4540				&bma.prev);
4541	n = 0;
4542	end = bno + len;
4543	obno = bno;
4544
4545	bma.tp = tp;
4546	bma.ip = ip;
4547	bma.total = total;
4548	bma.userdata = 0;
4549	bma.flist = flist;
4550	bma.firstblock = firstblock;
4551
4552	while (bno < end && n < *nmap) {
4553		inhole = eof || bma.got.br_startoff > bno;
4554		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4555
4556		/*
4557		 * First, deal with the hole before the allocated space
4558		 * that we found, if any.
4559		 */
4560		if (inhole || wasdelay) {
4561			bma.eof = eof;
4562			bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4563			bma.wasdel = wasdelay;
4564			bma.offset = bno;
4565			bma.flags = flags;
4566
4567			/*
4568			 * There's a 32/64 bit type mismatch between the
4569			 * allocation length request (which can be 64 bits in
4570			 * length) and the bma length request, which is
4571			 * xfs_extlen_t and therefore 32 bits. Hence we have to
4572			 * check for 32-bit overflows and handle them here.
4573			 */
4574			if (len > (xfs_filblks_t)MAXEXTLEN)
4575				bma.length = MAXEXTLEN;
4576			else
4577				bma.length = len;
4578
4579			ASSERT(len > 0);
4580			ASSERT(bma.length > 0);
4581			error = xfs_bmapi_allocate(&bma);
4582			if (error)
4583				goto error0;
4584			if (bma.blkno == NULLFSBLOCK)
4585				break;
4586		}
4587
4588		/* Deal with the allocated space we found.  */
4589		xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4590							end, n, flags);
4591
4592		/* Execute unwritten extent conversion if necessary */
4593		error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4594		if (error == -EAGAIN)
4595			continue;
4596		if (error)
4597			goto error0;
4598
4599		/* update the extent map to return */
4600		xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4601
4602		/*
4603		 * If we're done, stop now.  Stop when we've allocated
4604		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4605		 * the transaction may get too big.
4606		 */
4607		if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4608			break;
4609
4610		/* Else go on to the next record. */
4611		bma.prev = bma.got;
4612		if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
4613			xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
4614					 &bma.got);
4615		} else
4616			eof = 1;
4617	}
4618	*nmap = n;
4619
4620	/*
4621	 * Transform from btree to extents, give it cur.
4622	 */
4623	if (xfs_bmap_wants_extents(ip, whichfork)) {
4624		int		tmp_logflags = 0;
4625
4626		ASSERT(bma.cur);
4627		error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4628			&tmp_logflags, whichfork);
4629		bma.logflags |= tmp_logflags;
4630		if (error)
4631			goto error0;
4632	}
4633
4634	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4635	       XFS_IFORK_NEXTENTS(ip, whichfork) >
4636		XFS_IFORK_MAXEXT(ip, whichfork));
4637	error = 0;
4638error0:
4639	/*
4640	 * Log everything.  Do this after conversion, there's no point in
4641	 * logging the extent records if we've converted to btree format.
4642	 */
4643	if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4644	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4645		bma.logflags &= ~xfs_ilog_fext(whichfork);
4646	else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4647		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4648		bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4649	/*
4650	 * Log whatever the flags say, even if error.  Otherwise we might miss
4651	 * detecting a case where the data is changed, there's an error,
4652	 * and it's not logged so we don't shutdown when we should.
4653	 */
4654	if (bma.logflags)
4655		xfs_trans_log_inode(tp, ip, bma.logflags);
4656
4657	if (bma.cur) {
4658		if (!error) {
4659			ASSERT(*firstblock == NULLFSBLOCK ||
4660			       XFS_FSB_TO_AGNO(mp, *firstblock) ==
4661			       XFS_FSB_TO_AGNO(mp,
4662				       bma.cur->bc_private.b.firstblock) ||
4663			       (flist->xbf_low &&
4664				XFS_FSB_TO_AGNO(mp, *firstblock) <
4665				XFS_FSB_TO_AGNO(mp,
4666					bma.cur->bc_private.b.firstblock)));
4667			*firstblock = bma.cur->bc_private.b.firstblock;
4668		}
4669		xfs_btree_del_cursor(bma.cur,
4670			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4671	}
4672	if (!error)
4673		xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4674			orig_nmap, *nmap);
4675	return error;
4676}
4677
4678/*
4679 * Called by xfs_bmapi to update file extent records and the btree
4680 * after removing space (or undoing a delayed allocation).
4681 */
4682STATIC int				/* error */
4683xfs_bmap_del_extent(
4684	xfs_inode_t		*ip,	/* incore inode pointer */
4685	xfs_trans_t		*tp,	/* current transaction pointer */
4686	xfs_extnum_t		*idx,	/* extent number to update/delete */
4687	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
4688	xfs_btree_cur_t		*cur,	/* if null, not a btree */
4689	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
4690	int			*logflagsp, /* inode logging flags */
4691	int			whichfork) /* data or attr fork */
4692{
4693	xfs_filblks_t		da_new;	/* new delay-alloc indirect blocks */
4694	xfs_filblks_t		da_old;	/* old delay-alloc indirect blocks */
4695	xfs_fsblock_t		del_endblock=0;	/* first block past del */
4696	xfs_fileoff_t		del_endoff;	/* first offset past del */
4697	int			delay;	/* current block is delayed allocated */
4698	int			do_fx;	/* free extent at end of routine */
4699	xfs_bmbt_rec_host_t	*ep;	/* current extent entry pointer */
4700	int			error;	/* error return value */
4701	int			flags;	/* inode logging flags */
4702	xfs_bmbt_irec_t		got;	/* current extent entry */
4703	xfs_fileoff_t		got_endoff;	/* first offset past got */
4704	int			i;	/* temp state */
4705	xfs_ifork_t		*ifp;	/* inode fork pointer */
4706	xfs_mount_t		*mp;	/* mount structure */
4707	xfs_filblks_t		nblks;	/* quota/sb block count */
4708	xfs_bmbt_irec_t		new;	/* new record to be inserted */
4709	/* REFERENCED */
4710	uint			qfield;	/* quota field to update */
4711	xfs_filblks_t		temp;	/* for indirect length calculations */
4712	xfs_filblks_t		temp2;	/* for indirect length calculations */
4713	int			state = 0;
4714
4715	XFS_STATS_INC(xs_del_exlist);
4716
4717	if (whichfork == XFS_ATTR_FORK)
4718		state |= BMAP_ATTRFORK;
4719
4720	mp = ip->i_mount;
4721	ifp = XFS_IFORK_PTR(ip, whichfork);
4722	ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
4723		(uint)sizeof(xfs_bmbt_rec_t)));
4724	ASSERT(del->br_blockcount > 0);
4725	ep = xfs_iext_get_ext(ifp, *idx);
4726	xfs_bmbt_get_all(ep, &got);
4727	ASSERT(got.br_startoff <= del->br_startoff);
4728	del_endoff = del->br_startoff + del->br_blockcount;
4729	got_endoff = got.br_startoff + got.br_blockcount;
4730	ASSERT(got_endoff >= del_endoff);
4731	delay = isnullstartblock(got.br_startblock);
4732	ASSERT(isnullstartblock(del->br_startblock) == delay);
4733	flags = 0;
4734	qfield = 0;
4735	error = 0;
4736	/*
4737	 * If deleting a real allocation, must free up the disk space.
4738	 */
4739	if (!delay) {
4740		flags = XFS_ILOG_CORE;
4741		/*
4742		 * Realtime allocation.  Free it and record di_nblocks update.
4743		 */
4744		if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4745			xfs_fsblock_t	bno;
4746			xfs_filblks_t	len;
4747
4748			ASSERT(do_mod(del->br_blockcount,
4749				      mp->m_sb.sb_rextsize) == 0);
4750			ASSERT(do_mod(del->br_startblock,
4751				      mp->m_sb.sb_rextsize) == 0);
4752			bno = del->br_startblock;
4753			len = del->br_blockcount;
4754			do_div(bno, mp->m_sb.sb_rextsize);
4755			do_div(len, mp->m_sb.sb_rextsize);
4756			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4757			if (error)
4758				goto done;
4759			do_fx = 0;
4760			nblks = len * mp->m_sb.sb_rextsize;
4761			qfield = XFS_TRANS_DQ_RTBCOUNT;
4762		}
4763		/*
4764		 * Ordinary allocation.
4765		 */
4766		else {
4767			do_fx = 1;
4768			nblks = del->br_blockcount;
4769			qfield = XFS_TRANS_DQ_BCOUNT;
4770		}
4771		/*
4772		 * Set up del_endblock and cur for later.
4773		 */
4774		del_endblock = del->br_startblock + del->br_blockcount;
4775		if (cur) {
4776			if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
4777					got.br_startblock, got.br_blockcount,
4778					&i)))
4779				goto done;
4780			XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4781		}
4782		da_old = da_new = 0;
4783	} else {
4784		da_old = startblockval(got.br_startblock);
4785		da_new = 0;
4786		nblks = 0;
4787		do_fx = 0;
4788	}
4789	/*
4790	 * Set flag value to use in switch statement.
4791	 * Left-contig is 2, right-contig is 1.
4792	 */
4793	switch (((got.br_startoff == del->br_startoff) << 1) |
4794		(got_endoff == del_endoff)) {
4795	case 3:
4796		/*
4797		 * Matches the whole extent.  Delete the entry.
4798		 */
4799		xfs_iext_remove(ip, *idx, 1,
4800				whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
4801		--*idx;
4802		if (delay)
4803			break;
4804
4805		XFS_IFORK_NEXT_SET(ip, whichfork,
4806			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4807		flags |= XFS_ILOG_CORE;
4808		if (!cur) {
4809			flags |= xfs_ilog_fext(whichfork);
4810			break;
4811		}
4812		if ((error = xfs_btree_delete(cur, &i)))
4813			goto done;
4814		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4815		break;
4816
4817	case 2:
4818		/*
4819		 * Deleting the first part of the extent.
4820		 */
4821		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4822		xfs_bmbt_set_startoff(ep, del_endoff);
4823		temp = got.br_blockcount - del->br_blockcount;
4824		xfs_bmbt_set_blockcount(ep, temp);
4825		if (delay) {
4826			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4827				da_old);
4828			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4829			trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4830			da_new = temp;
4831			break;
4832		}
4833		xfs_bmbt_set_startblock(ep, del_endblock);
4834		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4835		if (!cur) {
4836			flags |= xfs_ilog_fext(whichfork);
4837			break;
4838		}
4839		if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
4840				got.br_blockcount - del->br_blockcount,
4841				got.br_state)))
4842			goto done;
4843		break;
4844
4845	case 1:
4846		/*
4847		 * Deleting the last part of the extent.
4848		 */
4849		temp = got.br_blockcount - del->br_blockcount;
4850		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4851		xfs_bmbt_set_blockcount(ep, temp);
4852		if (delay) {
4853			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4854				da_old);
4855			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4856			trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4857			da_new = temp;
4858			break;
4859		}
4860		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4861		if (!cur) {
4862			flags |= xfs_ilog_fext(whichfork);
4863			break;
4864		}
4865		if ((error = xfs_bmbt_update(cur, got.br_startoff,
4866				got.br_startblock,
4867				got.br_blockcount - del->br_blockcount,
4868				got.br_state)))
4869			goto done;
4870		break;
4871
4872	case 0:
4873		/*
4874		 * Deleting the middle of the extent.
4875		 */
4876		temp = del->br_startoff - got.br_startoff;
4877		trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4878		xfs_bmbt_set_blockcount(ep, temp);
4879		new.br_startoff = del_endoff;
4880		temp2 = got_endoff - del_endoff;
4881		new.br_blockcount = temp2;
4882		new.br_state = got.br_state;
4883		if (!delay) {
4884			new.br_startblock = del_endblock;
4885			flags |= XFS_ILOG_CORE;
4886			if (cur) {
4887				if ((error = xfs_bmbt_update(cur,
4888						got.br_startoff,
4889						got.br_startblock, temp,
4890						got.br_state)))
4891					goto done;
4892				if ((error = xfs_btree_increment(cur, 0, &i)))
4893					goto done;
4894				cur->bc_rec.b = new;
4895				error = xfs_btree_insert(cur, &i);
4896				if (error && error != -ENOSPC)
4897					goto done;
4898				/*
4899				 * If get no-space back from btree insert,
4900				 * it tried a split, and we have a zero
4901				 * block reservation.
4902				 * Fix up our state and return the error.
4903				 */
4904				if (error == -ENOSPC) {
4905					/*
4906					 * Reset the cursor, don't trust
4907					 * it after any insert operation.
4908					 */
4909					if ((error = xfs_bmbt_lookup_eq(cur,
4910							got.br_startoff,
4911							got.br_startblock,
4912							temp, &i)))
4913						goto done;
4914					XFS_WANT_CORRUPTED_GOTO(mp,
4915								i == 1, done);
4916					/*
4917					 * Update the btree record back
4918					 * to the original value.
4919					 */
4920					if ((error = xfs_bmbt_update(cur,
4921							got.br_startoff,
4922							got.br_startblock,
4923							got.br_blockcount,
4924							got.br_state)))
4925						goto done;
4926					/*
4927					 * Reset the extent record back
4928					 * to the original value.
4929					 */
4930					xfs_bmbt_set_blockcount(ep,
4931						got.br_blockcount);
4932					flags = 0;
4933					error = -ENOSPC;
4934					goto done;
4935				}
4936				XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4937			} else
4938				flags |= xfs_ilog_fext(whichfork);
4939			XFS_IFORK_NEXT_SET(ip, whichfork,
4940				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
4941		} else {
4942			ASSERT(whichfork == XFS_DATA_FORK);
4943			temp = xfs_bmap_worst_indlen(ip, temp);
4944			xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4945			temp2 = xfs_bmap_worst_indlen(ip, temp2);
4946			new.br_startblock = nullstartblock((int)temp2);
4947			da_new = temp + temp2;
4948			while (da_new > da_old) {
4949				if (temp) {
4950					temp--;
4951					da_new--;
4952					xfs_bmbt_set_startblock(ep,
4953						nullstartblock((int)temp));
4954				}
4955				if (da_new == da_old)
4956					break;
4957				if (temp2) {
4958					temp2--;
4959					da_new--;
4960					new.br_startblock =
4961						nullstartblock((int)temp2);
4962				}
4963			}
4964		}
4965		trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4966		xfs_iext_insert(ip, *idx + 1, 1, &new, state);
4967		++*idx;
4968		break;
4969	}
4970	/*
4971	 * If we need to, add to list of extents to delete.
4972	 */
4973	if (do_fx)
4974		xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
4975			mp);
4976	/*
4977	 * Adjust inode # blocks in the file.
4978	 */
4979	if (nblks)
4980		ip->i_d.di_nblocks -= nblks;
4981	/*
4982	 * Adjust quota data.
4983	 */
4984	if (qfield)
4985		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
4986
4987	/*
4988	 * Account for change in delayed indirect blocks.
4989	 * Nothing to do for disk quota accounting here.
4990	 */
4991	ASSERT(da_old >= da_new);
4992	if (da_old > da_new)
4993		xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
4994done:
4995	*logflagsp = flags;
4996	return error;
4997}
4998
4999/*
5000 * Unmap (remove) blocks from a file.
5001 * If nexts is nonzero then the number of extents to remove is limited to
5002 * that value.  If not all extents in the block range can be removed then
5003 * *done is set.
5004 */
5005int						/* error */
5006xfs_bunmapi(
5007	xfs_trans_t		*tp,		/* transaction pointer */
5008	struct xfs_inode	*ip,		/* incore inode */
5009	xfs_fileoff_t		bno,		/* starting offset to unmap */
5010	xfs_filblks_t		len,		/* length to unmap in file */
5011	int			flags,		/* misc flags */
5012	xfs_extnum_t		nexts,		/* number of extents max */
5013	xfs_fsblock_t		*firstblock,	/* first allocated block
5014						   controls a.g. for allocs */
5015	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
5016	int			*done)		/* set if not done yet */
5017{
5018	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
5019	xfs_bmbt_irec_t		del;		/* extent being deleted */
5020	int			eof;		/* is deleting at eof */
5021	xfs_bmbt_rec_host_t	*ep;		/* extent record pointer */
5022	int			error;		/* error return value */
5023	xfs_extnum_t		extno;		/* extent number in list */
5024	xfs_bmbt_irec_t		got;		/* current extent record */
5025	xfs_ifork_t		*ifp;		/* inode fork pointer */
5026	int			isrt;		/* freeing in rt area */
5027	xfs_extnum_t		lastx;		/* last extent index used */
5028	int			logflags;	/* transaction logging flags */
5029	xfs_extlen_t		mod;		/* rt extent offset */
5030	xfs_mount_t		*mp;		/* mount structure */
5031	xfs_extnum_t		nextents;	/* number of file extents */
5032	xfs_bmbt_irec_t		prev;		/* previous extent record */
5033	xfs_fileoff_t		start;		/* first file offset deleted */
5034	int			tmp_logflags;	/* partial logging flags */
5035	int			wasdel;		/* was a delayed alloc extent */
5036	int			whichfork;	/* data or attribute fork */
5037	xfs_fsblock_t		sum;
5038
5039	trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5040
5041	whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5042		XFS_ATTR_FORK : XFS_DATA_FORK;
5043	ifp = XFS_IFORK_PTR(ip, whichfork);
5044	if (unlikely(
5045	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5046	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5047		XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5048				 ip->i_mount);
5049		return -EFSCORRUPTED;
5050	}
5051	mp = ip->i_mount;
5052	if (XFS_FORCED_SHUTDOWN(mp))
5053		return -EIO;
5054
5055	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5056	ASSERT(len > 0);
5057	ASSERT(nexts >= 0);
5058
5059	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5060	    (error = xfs_iread_extents(tp, ip, whichfork)))
5061		return error;
5062	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5063	if (nextents == 0) {
5064		*done = 1;
5065		return 0;
5066	}
5067	XFS_STATS_INC(xs_blk_unmap);
5068	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5069	start = bno;
5070	bno = start + len - 1;
5071	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5072		&prev);
5073
5074	/*
5075	 * Check to see if the given block number is past the end of the
5076	 * file, back up to the last block if so...
5077	 */
5078	if (eof) {
5079		ep = xfs_iext_get_ext(ifp, --lastx);
5080		xfs_bmbt_get_all(ep, &got);
5081		bno = got.br_startoff + got.br_blockcount - 1;
5082	}
5083	logflags = 0;
5084	if (ifp->if_flags & XFS_IFBROOT) {
5085		ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5086		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5087		cur->bc_private.b.firstblock = *firstblock;
5088		cur->bc_private.b.flist = flist;
5089		cur->bc_private.b.flags = 0;
5090	} else
5091		cur = NULL;
5092
5093	if (isrt) {
5094		/*
5095		 * Synchronize by locking the bitmap inode.
5096		 */
5097		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
5098		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5099	}
5100
5101	extno = 0;
5102	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5103	       (nexts == 0 || extno < nexts)) {
5104		/*
5105		 * Is the found extent after a hole in which bno lives?
5106		 * Just back up to the previous extent, if so.
5107		 */
5108		if (got.br_startoff > bno) {
5109			if (--lastx < 0)
5110				break;
5111			ep = xfs_iext_get_ext(ifp, lastx);
5112			xfs_bmbt_get_all(ep, &got);
5113		}
5114		/*
5115		 * Is the last block of this extent before the range
5116		 * we're supposed to delete?  If so, we're done.
5117		 */
5118		bno = XFS_FILEOFF_MIN(bno,
5119			got.br_startoff + got.br_blockcount - 1);
5120		if (bno < start)
5121			break;
5122		/*
5123		 * Then deal with the (possibly delayed) allocated space
5124		 * we found.
5125		 */
5126		ASSERT(ep != NULL);
5127		del = got;
5128		wasdel = isnullstartblock(del.br_startblock);
5129		if (got.br_startoff < start) {
5130			del.br_startoff = start;
5131			del.br_blockcount -= start - got.br_startoff;
5132			if (!wasdel)
5133				del.br_startblock += start - got.br_startoff;
5134		}
5135		if (del.br_startoff + del.br_blockcount > bno + 1)
5136			del.br_blockcount = bno + 1 - del.br_startoff;
5137		sum = del.br_startblock + del.br_blockcount;
5138		if (isrt &&
5139		    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5140			/*
5141			 * Realtime extent not lined up at the end.
5142			 * The extent could have been split into written
5143			 * and unwritten pieces, or we could just be
5144			 * unmapping part of it.  But we can't really
5145			 * get rid of part of a realtime extent.
5146			 */
5147			if (del.br_state == XFS_EXT_UNWRITTEN ||
5148			    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5149				/*
5150				 * This piece is unwritten, or we're not
5151				 * using unwritten extents.  Skip over it.
5152				 */
5153				ASSERT(bno >= mod);
5154				bno -= mod > del.br_blockcount ?
5155					del.br_blockcount : mod;
5156				if (bno < got.br_startoff) {
5157					if (--lastx >= 0)
5158						xfs_bmbt_get_all(xfs_iext_get_ext(
5159							ifp, lastx), &got);
5160				}
5161				continue;
5162			}
5163			/*
5164			 * It's written, turn it unwritten.
5165			 * This is better than zeroing it.
5166			 */
5167			ASSERT(del.br_state == XFS_EXT_NORM);
5168			ASSERT(xfs_trans_get_block_res(tp) > 0);
5169			/*
5170			 * If this spans a realtime extent boundary,
5171			 * chop it back to the start of the one we end at.
5172			 */
5173			if (del.br_blockcount > mod) {
5174				del.br_startoff += del.br_blockcount - mod;
5175				del.br_startblock += del.br_blockcount - mod;
5176				del.br_blockcount = mod;
5177			}
5178			del.br_state = XFS_EXT_UNWRITTEN;
5179			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5180					&lastx, &cur, &del, firstblock, flist,
5181					&logflags);
5182			if (error)
5183				goto error0;
5184			goto nodelete;
5185		}
5186		if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5187			/*
5188			 * Realtime extent is lined up at the end but not
5189			 * at the front.  We'll get rid of full extents if
5190			 * we can.
5191			 */
5192			mod = mp->m_sb.sb_rextsize - mod;
5193			if (del.br_blockcount > mod) {
5194				del.br_blockcount -= mod;
5195				del.br_startoff += mod;
5196				del.br_startblock += mod;
5197			} else if ((del.br_startoff == start &&
5198				    (del.br_state == XFS_EXT_UNWRITTEN ||
5199				     xfs_trans_get_block_res(tp) == 0)) ||
5200				   !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5201				/*
5202				 * Can't make it unwritten.  There isn't
5203				 * a full extent here so just skip it.
5204				 */
5205				ASSERT(bno >= del.br_blockcount);
5206				bno -= del.br_blockcount;
5207				if (got.br_startoff > bno) {
5208					if (--lastx >= 0) {
5209						ep = xfs_iext_get_ext(ifp,
5210								      lastx);
5211						xfs_bmbt_get_all(ep, &got);
5212					}
5213				}
5214				continue;
5215			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5216				/*
5217				 * This one is already unwritten.
5218				 * It must have a written left neighbor.
5219				 * Unwrite the killed part of that one and
5220				 * try again.
5221				 */
5222				ASSERT(lastx > 0);
5223				xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5224						lastx - 1), &prev);
5225				ASSERT(prev.br_state == XFS_EXT_NORM);
5226				ASSERT(!isnullstartblock(prev.br_startblock));
5227				ASSERT(del.br_startblock ==
5228				       prev.br_startblock + prev.br_blockcount);
5229				if (prev.br_startoff < start) {
5230					mod = start - prev.br_startoff;
5231					prev.br_blockcount -= mod;
5232					prev.br_startblock += mod;
5233					prev.br_startoff = start;
5234				}
5235				prev.br_state = XFS_EXT_UNWRITTEN;
5236				lastx--;
5237				error = xfs_bmap_add_extent_unwritten_real(tp,
5238						ip, &lastx, &cur, &prev,
5239						firstblock, flist, &logflags);
5240				if (error)
5241					goto error0;
5242				goto nodelete;
5243			} else {
5244				ASSERT(del.br_state == XFS_EXT_NORM);
5245				del.br_state = XFS_EXT_UNWRITTEN;
5246				error = xfs_bmap_add_extent_unwritten_real(tp,
5247						ip, &lastx, &cur, &del,
5248						firstblock, flist, &logflags);
5249				if (error)
5250					goto error0;
5251				goto nodelete;
5252			}
5253		}
5254		if (wasdel) {
5255			ASSERT(startblockval(del.br_startblock) > 0);
5256			/* Update realtime/data freespace, unreserve quota */
5257			if (isrt) {
5258				xfs_filblks_t rtexts;
5259
5260				rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5261				do_div(rtexts, mp->m_sb.sb_rextsize);
5262				xfs_mod_frextents(mp, (int64_t)rtexts);
5263				(void)xfs_trans_reserve_quota_nblks(NULL,
5264					ip, -((long)del.br_blockcount), 0,
5265					XFS_QMOPT_RES_RTBLKS);
5266			} else {
5267				xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
5268						 false);
5269				(void)xfs_trans_reserve_quota_nblks(NULL,
5270					ip, -((long)del.br_blockcount), 0,
5271					XFS_QMOPT_RES_REGBLKS);
5272			}
5273			ip->i_delayed_blks -= del.br_blockcount;
5274			if (cur)
5275				cur->bc_private.b.flags |=
5276					XFS_BTCUR_BPRV_WASDEL;
5277		} else if (cur)
5278			cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5279		/*
5280		 * If it's the case where the directory code is running
5281		 * with no block reservation, and the deleted block is in
5282		 * the middle of its extent, and the resulting insert
5283		 * of an extent would cause transformation to btree format,
5284		 * then reject it.  The calling code will then swap
5285		 * blocks around instead.
5286		 * We have to do this now, rather than waiting for the
5287		 * conversion to btree format, since the transaction
5288		 * will be dirty.
5289		 */
5290		if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
5291		    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5292		    XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5293			XFS_IFORK_MAXEXT(ip, whichfork) &&
5294		    del.br_startoff > got.br_startoff &&
5295		    del.br_startoff + del.br_blockcount <
5296		    got.br_startoff + got.br_blockcount) {
5297			error = -ENOSPC;
5298			goto error0;
5299		}
5300		error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
5301				&tmp_logflags, whichfork);
5302		logflags |= tmp_logflags;
5303		if (error)
5304			goto error0;
5305		bno = del.br_startoff - 1;
5306nodelete:
5307		/*
5308		 * If not done go on to the next (previous) record.
5309		 */
5310		if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5311			if (lastx >= 0) {
5312				ep = xfs_iext_get_ext(ifp, lastx);
5313				if (xfs_bmbt_get_startoff(ep) > bno) {
5314					if (--lastx >= 0)
5315						ep = xfs_iext_get_ext(ifp,
5316								      lastx);
5317				}
5318				xfs_bmbt_get_all(ep, &got);
5319			}
5320			extno++;
5321		}
5322	}
5323	*done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5324
5325	/*
5326	 * Convert to a btree if necessary.
5327	 */
5328	if (xfs_bmap_needs_btree(ip, whichfork)) {
5329		ASSERT(cur == NULL);
5330		error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
5331			&cur, 0, &tmp_logflags, whichfork);
5332		logflags |= tmp_logflags;
5333		if (error)
5334			goto error0;
5335	}
5336	/*
5337	 * transform from btree to extents, give it cur
5338	 */
5339	else if (xfs_bmap_wants_extents(ip, whichfork)) {
5340		ASSERT(cur != NULL);
5341		error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5342			whichfork);
5343		logflags |= tmp_logflags;
5344		if (error)
5345			goto error0;
5346	}
5347	/*
5348	 * transform from extents to local?
5349	 */
5350	error = 0;
5351error0:
5352	/*
5353	 * Log everything.  Do this after conversion, there's no point in
5354	 * logging the extent records if we've converted to btree format.
5355	 */
5356	if ((logflags & xfs_ilog_fext(whichfork)) &&
5357	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5358		logflags &= ~xfs_ilog_fext(whichfork);
5359	else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5360		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5361		logflags &= ~xfs_ilog_fbroot(whichfork);
5362	/*
5363	 * Log inode even in the error case, if the transaction
5364	 * is dirty we'll need to shut down the filesystem.
5365	 */
5366	if (logflags)
5367		xfs_trans_log_inode(tp, ip, logflags);
5368	if (cur) {
5369		if (!error) {
5370			*firstblock = cur->bc_private.b.firstblock;
5371			cur->bc_private.b.allocated = 0;
5372		}
5373		xfs_btree_del_cursor(cur,
5374			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5375	}
5376	return error;
5377}
5378
5379/*
5380 * Determine whether an extent shift can be accomplished by a merge with the
5381 * extent that precedes the target hole of the shift.
5382 */
5383STATIC bool
5384xfs_bmse_can_merge(
5385	struct xfs_bmbt_irec	*left,	/* preceding extent */
5386	struct xfs_bmbt_irec	*got,	/* current extent to shift */
5387	xfs_fileoff_t		shift)	/* shift fsb */
5388{
5389	xfs_fileoff_t		startoff;
5390
5391	startoff = got->br_startoff - shift;
5392
5393	/*
5394	 * The extent, once shifted, must be adjacent in-file and on-disk with
5395	 * the preceding extent.
5396	 */
5397	if ((left->br_startoff + left->br_blockcount != startoff) ||
5398	    (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5399	    (left->br_state != got->br_state) ||
5400	    (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5401		return false;
5402
5403	return true;
5404}
5405
5406/*
5407 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5408 * hole in the file. If an extent shift would result in the extent being fully
5409 * adjacent to the extent that currently precedes the hole, we can merge with
5410 * the preceding extent rather than do the shift.
5411 *
5412 * This function assumes the caller has verified a shift-by-merge is possible
5413 * with the provided extents via xfs_bmse_can_merge().
5414 */
5415STATIC int
5416xfs_bmse_merge(
5417	struct xfs_inode		*ip,
5418	int				whichfork,
5419	xfs_fileoff_t			shift,		/* shift fsb */
5420	int				current_ext,	/* idx of gotp */
5421	struct xfs_bmbt_rec_host	*gotp,		/* extent to shift */
5422	struct xfs_bmbt_rec_host	*leftp,		/* preceding extent */
5423	struct xfs_btree_cur		*cur,
5424	int				*logflags)	/* output */
5425{
5426	struct xfs_bmbt_irec		got;
5427	struct xfs_bmbt_irec		left;
5428	xfs_filblks_t			blockcount;
5429	int				error, i;
5430	struct xfs_mount		*mp = ip->i_mount;
5431
5432	xfs_bmbt_get_all(gotp, &got);
5433	xfs_bmbt_get_all(leftp, &left);
5434	blockcount = left.br_blockcount + got.br_blockcount;
5435
5436	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5437	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5438	ASSERT(xfs_bmse_can_merge(&left, &got, shift));
5439
5440	/*
5441	 * Merge the in-core extents. Note that the host record pointers and
5442	 * current_ext index are invalid once the extent has been removed via
5443	 * xfs_iext_remove().
5444	 */
5445	xfs_bmbt_set_blockcount(leftp, blockcount);
5446	xfs_iext_remove(ip, current_ext, 1, 0);
5447
5448	/*
5449	 * Update the on-disk extent count, the btree if necessary and log the
5450	 * inode.
5451	 */
5452	XFS_IFORK_NEXT_SET(ip, whichfork,
5453			   XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5454	*logflags |= XFS_ILOG_CORE;
5455	if (!cur) {
5456		*logflags |= XFS_ILOG_DEXT;
5457		return 0;
5458	}
5459
5460	/* lookup and remove the extent to merge */
5461	error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5462				   got.br_blockcount, &i);
5463	if (error)
5464		return error;
5465	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5466
5467	error = xfs_btree_delete(cur, &i);
5468	if (error)
5469		return error;
5470	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5471
5472	/* lookup and update size of the previous extent */
5473	error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5474				   left.br_blockcount, &i);
5475	if (error)
5476		return error;
5477	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5478
5479	left.br_blockcount = blockcount;
5480
5481	return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
5482			       left.br_blockcount, left.br_state);
5483}
5484
5485/*
5486 * Shift a single extent.
5487 */
5488STATIC int
5489xfs_bmse_shift_one(
5490	struct xfs_inode		*ip,
5491	int				whichfork,
5492	xfs_fileoff_t			offset_shift_fsb,
5493	int				*current_ext,
5494	struct xfs_bmbt_rec_host	*gotp,
5495	struct xfs_btree_cur		*cur,
5496	int				*logflags,
5497	enum shift_direction		direction)
5498{
5499	struct xfs_ifork		*ifp;
5500	struct xfs_mount		*mp;
5501	xfs_fileoff_t			startoff;
5502	struct xfs_bmbt_rec_host	*adj_irecp;
5503	struct xfs_bmbt_irec		got;
5504	struct xfs_bmbt_irec		adj_irec;
5505	int				error;
5506	int				i;
5507	int				total_extents;
5508
5509	mp = ip->i_mount;
5510	ifp = XFS_IFORK_PTR(ip, whichfork);
5511	total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5512
5513	xfs_bmbt_get_all(gotp, &got);
5514
5515	/* delalloc extents should be prevented by caller */
5516	XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5517
5518	if (direction == SHIFT_LEFT) {
5519		startoff = got.br_startoff - offset_shift_fsb;
5520
5521		/*
5522		 * Check for merge if we've got an extent to the left,
5523		 * otherwise make sure there's enough room at the start
5524		 * of the file for the shift.
5525		 */
5526		if (!*current_ext) {
5527			if (got.br_startoff < offset_shift_fsb)
5528				return -EINVAL;
5529			goto update_current_ext;
5530		}
5531		/*
5532		 * grab the left extent and check for a large
5533		 * enough hole.
5534		 */
5535		adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5536		xfs_bmbt_get_all(adj_irecp, &adj_irec);
5537
5538		if (startoff <
5539		    adj_irec.br_startoff + adj_irec.br_blockcount)
5540			return -EINVAL;
5541
5542		/* check whether to merge the extent or shift it down */
5543		if (xfs_bmse_can_merge(&adj_irec, &got,
5544				       offset_shift_fsb)) {
5545			return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5546					      *current_ext, gotp, adj_irecp,
5547					      cur, logflags);
5548		}
5549	} else {
5550		startoff = got.br_startoff + offset_shift_fsb;
5551		/* nothing to move if this is the last extent */
5552		if (*current_ext >= (total_extents - 1))
5553			goto update_current_ext;
5554		/*
5555		 * If this is not the last extent in the file, make sure there
5556		 * is enough room between current extent and next extent for
5557		 * accommodating the shift.
5558		 */
5559		adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5560		xfs_bmbt_get_all(adj_irecp, &adj_irec);
5561		if (startoff + got.br_blockcount > adj_irec.br_startoff)
5562			return -EINVAL;
5563		/*
5564		 * Unlike a left shift (which involves a hole punch),
5565		 * a right shift does not modify extent neighbors
5566		 * in any way. We should never find mergeable extents
5567		 * in this scenario. Check anyways and warn if we
5568		 * encounter two extents that could be one.
5569		 */
5570		if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5571			WARN_ON_ONCE(1);
5572	}
5573	/*
5574	 * Increment the extent index for the next iteration, update the start
5575	 * offset of the in-core extent and update the btree if applicable.
5576	 */
5577update_current_ext:
5578	if (direction == SHIFT_LEFT)
5579		(*current_ext)++;
5580	else
5581		(*current_ext)--;
5582	xfs_bmbt_set_startoff(gotp, startoff);
5583	*logflags |= XFS_ILOG_CORE;
5584	if (!cur) {
5585		*logflags |= XFS_ILOG_DEXT;
5586		return 0;
5587	}
5588
5589	error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5590				   got.br_blockcount, &i);
5591	if (error)
5592		return error;
5593	XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5594
5595	got.br_startoff = startoff;
5596	return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5597			       got.br_blockcount, got.br_state);
5598}
5599
5600/*
5601 * Shift extent records to the left/right to cover/create a hole.
5602 *
5603 * The maximum number of extents to be shifted in a single operation is
5604 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5605 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5606 * is the length by which each extent is shifted. If there is no hole to shift
5607 * the extents into, this will be considered invalid operation and we abort
5608 * immediately.
5609 */
5610int
5611xfs_bmap_shift_extents(
5612	struct xfs_trans	*tp,
5613	struct xfs_inode	*ip,
5614	xfs_fileoff_t		*next_fsb,
5615	xfs_fileoff_t		offset_shift_fsb,
5616	int			*done,
5617	xfs_fileoff_t		stop_fsb,
5618	xfs_fsblock_t		*firstblock,
5619	struct xfs_bmap_free	*flist,
5620	enum shift_direction	direction,
5621	int			num_exts)
5622{
5623	struct xfs_btree_cur		*cur = NULL;
5624	struct xfs_bmbt_rec_host	*gotp;
5625	struct xfs_bmbt_irec            got;
5626	struct xfs_mount		*mp = ip->i_mount;
5627	struct xfs_ifork		*ifp;
5628	xfs_extnum_t			nexts = 0;
5629	xfs_extnum_t			current_ext;
5630	xfs_extnum_t			total_extents;
5631	xfs_extnum_t			stop_extent;
5632	int				error = 0;
5633	int				whichfork = XFS_DATA_FORK;
5634	int				logflags = 0;
5635
5636	if (unlikely(XFS_TEST_ERROR(
5637	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5638	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5639	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5640		XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5641				 XFS_ERRLEVEL_LOW, mp);
5642		return -EFSCORRUPTED;
5643	}
5644
5645	if (XFS_FORCED_SHUTDOWN(mp))
5646		return -EIO;
5647
5648	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5649	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5650	ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5651	ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5652
5653	ifp = XFS_IFORK_PTR(ip, whichfork);
5654	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5655		/* Read in all the extents */
5656		error = xfs_iread_extents(tp, ip, whichfork);
5657		if (error)
5658			return error;
5659	}
5660
5661	if (ifp->if_flags & XFS_IFBROOT) {
5662		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5663		cur->bc_private.b.firstblock = *firstblock;
5664		cur->bc_private.b.flist = flist;
5665		cur->bc_private.b.flags = 0;
5666	}
5667
5668	/*
5669	 * There may be delalloc extents in the data fork before the range we
5670	 * are collapsing out, so we cannot use the count of real extents here.
5671	 * Instead we have to calculate it from the incore fork.
5672	 */
5673	total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5674	if (total_extents == 0) {
5675		*done = 1;
5676		goto del_cursor;
5677	}
5678
5679	/*
5680	 * In case of first right shift, we need to initialize next_fsb
5681	 */
5682	if (*next_fsb == NULLFSBLOCK) {
5683		gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5684		xfs_bmbt_get_all(gotp, &got);
5685		*next_fsb = got.br_startoff;
5686		if (stop_fsb > *next_fsb) {
5687			*done = 1;
5688			goto del_cursor;
5689		}
5690	}
5691
5692	/* Lookup the extent index at which we have to stop */
5693	if (direction == SHIFT_RIGHT) {
5694		gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5695		/* Make stop_extent exclusive of shift range */
5696		stop_extent--;
5697	} else
5698		stop_extent = total_extents;
5699
5700	/*
5701	 * Look up the extent index for the fsb where we start shifting. We can
5702	 * henceforth iterate with current_ext as extent list changes are locked
5703	 * out via ilock.
5704	 *
5705	 * gotp can be null in 2 cases: 1) if there are no extents or 2)
5706	 * *next_fsb lies in a hole beyond which there are no extents. Either
5707	 * way, we are done.
5708	 */
5709	gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5710	if (!gotp) {
5711		*done = 1;
5712		goto del_cursor;
5713	}
5714
5715	/* some sanity checking before we finally start shifting extents */
5716	if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5717	     (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5718		error = -EIO;
5719		goto del_cursor;
5720	}
5721
5722	while (nexts++ < num_exts) {
5723		error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5724					   &current_ext, gotp, cur, &logflags,
5725					   direction);
5726		if (error)
5727			goto del_cursor;
5728		/*
5729		 * If there was an extent merge during the shift, the extent
5730		 * count can change. Update the total and grade the next record.
5731		 */
5732		if (direction == SHIFT_LEFT) {
5733			total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5734			stop_extent = total_extents;
5735		}
5736
5737		if (current_ext == stop_extent) {
5738			*done = 1;
5739			*next_fsb = NULLFSBLOCK;
5740			break;
5741		}
5742		gotp = xfs_iext_get_ext(ifp, current_ext);
5743	}
5744
5745	if (!*done) {
5746		xfs_bmbt_get_all(gotp, &got);
5747		*next_fsb = got.br_startoff;
5748	}
5749
5750del_cursor:
5751	if (cur)
5752		xfs_btree_del_cursor(cur,
5753			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5754
5755	if (logflags)
5756		xfs_trans_log_inode(tp, ip, logflags);
5757
5758	return error;
5759}
5760
5761/*
5762 * Splits an extent into two extents at split_fsb block such that it is
5763 * the first block of the current_ext. @current_ext is a target extent
5764 * to be split. @split_fsb is a block where the extents is split.
5765 * If split_fsb lies in a hole or the first block of extents, just return 0.
5766 */
5767STATIC int
5768xfs_bmap_split_extent_at(
5769	struct xfs_trans	*tp,
5770	struct xfs_inode	*ip,
5771	xfs_fileoff_t		split_fsb,
5772	xfs_fsblock_t		*firstfsb,
5773	struct xfs_bmap_free	*free_list)
5774{
5775	int				whichfork = XFS_DATA_FORK;
5776	struct xfs_btree_cur		*cur = NULL;
5777	struct xfs_bmbt_rec_host	*gotp;
5778	struct xfs_bmbt_irec		got;
5779	struct xfs_bmbt_irec		new; /* split extent */
5780	struct xfs_mount		*mp = ip->i_mount;
5781	struct xfs_ifork		*ifp;
5782	xfs_fsblock_t			gotblkcnt; /* new block count for got */
5783	xfs_extnum_t			current_ext;
5784	int				error = 0;
5785	int				logflags = 0;
5786	int				i = 0;
5787
5788	if (unlikely(XFS_TEST_ERROR(
5789	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5790	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5791	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5792		XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5793				 XFS_ERRLEVEL_LOW, mp);
5794		return -EFSCORRUPTED;
5795	}
5796
5797	if (XFS_FORCED_SHUTDOWN(mp))
5798		return -EIO;
5799
5800	ifp = XFS_IFORK_PTR(ip, whichfork);
5801	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5802		/* Read in all the extents */
5803		error = xfs_iread_extents(tp, ip, whichfork);
5804		if (error)
5805			return error;
5806	}
5807
5808	/*
5809	 * gotp can be null in 2 cases: 1) if there are no extents
5810	 * or 2) split_fsb lies in a hole beyond which there are
5811	 * no extents. Either way, we are done.
5812	 */
5813	gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5814	if (!gotp)
5815		return 0;
5816
5817	xfs_bmbt_get_all(gotp, &got);
5818
5819	/*
5820	 * Check split_fsb lies in a hole or the start boundary offset
5821	 * of the extent.
5822	 */
5823	if (got.br_startoff >= split_fsb)
5824		return 0;
5825
5826	gotblkcnt = split_fsb - got.br_startoff;
5827	new.br_startoff = split_fsb;
5828	new.br_startblock = got.br_startblock + gotblkcnt;
5829	new.br_blockcount = got.br_blockcount - gotblkcnt;
5830	new.br_state = got.br_state;
5831
5832	if (ifp->if_flags & XFS_IFBROOT) {
5833		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5834		cur->bc_private.b.firstblock = *firstfsb;
5835		cur->bc_private.b.flist = free_list;
5836		cur->bc_private.b.flags = 0;
5837		error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5838				got.br_startblock,
5839				got.br_blockcount,
5840				&i);
5841		if (error)
5842			goto del_cursor;
5843		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5844	}
5845
5846	xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5847	got.br_blockcount = gotblkcnt;
5848
5849	logflags = XFS_ILOG_CORE;
5850	if (cur) {
5851		error = xfs_bmbt_update(cur, got.br_startoff,
5852				got.br_startblock,
5853				got.br_blockcount,
5854				got.br_state);
5855		if (error)
5856			goto del_cursor;
5857	} else
5858		logflags |= XFS_ILOG_DEXT;
5859
5860	/* Add new extent */
5861	current_ext++;
5862	xfs_iext_insert(ip, current_ext, 1, &new, 0);
5863	XFS_IFORK_NEXT_SET(ip, whichfork,
5864			   XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5865
5866	if (cur) {
5867		error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
5868				new.br_startblock, new.br_blockcount,
5869				&i);
5870		if (error)
5871			goto del_cursor;
5872		XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5873		cur->bc_rec.b.br_state = new.br_state;
5874
5875		error = xfs_btree_insert(cur, &i);
5876		if (error)
5877			goto del_cursor;
5878		XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5879	}
5880
5881	/*
5882	 * Convert to a btree if necessary.
5883	 */
5884	if (xfs_bmap_needs_btree(ip, whichfork)) {
5885		int tmp_logflags; /* partial log flag return val */
5886
5887		ASSERT(cur == NULL);
5888		error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
5889				&cur, 0, &tmp_logflags, whichfork);
5890		logflags |= tmp_logflags;
5891	}
5892
5893del_cursor:
5894	if (cur) {
5895		cur->bc_private.b.allocated = 0;
5896		xfs_btree_del_cursor(cur,
5897				error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5898	}
5899
5900	if (logflags)
5901		xfs_trans_log_inode(tp, ip, logflags);
5902	return error;
5903}
5904
5905int
5906xfs_bmap_split_extent(
5907	struct xfs_inode        *ip,
5908	xfs_fileoff_t           split_fsb)
5909{
5910	struct xfs_mount        *mp = ip->i_mount;
5911	struct xfs_trans        *tp;
5912	struct xfs_bmap_free    free_list;
5913	xfs_fsblock_t           firstfsb;
5914	int                     committed;
5915	int                     error;
5916
5917	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
5918	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5919			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5920	if (error) {
5921		xfs_trans_cancel(tp, 0);
5922		return error;
5923	}
5924
5925	xfs_ilock(ip, XFS_ILOCK_EXCL);
5926	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5927
5928	xfs_bmap_init(&free_list, &firstfsb);
5929
5930	error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5931			&firstfsb, &free_list);
5932	if (error)
5933		goto out;
5934
5935	error = xfs_bmap_finish(&tp, &free_list, &committed);
5936	if (error)
5937		goto out;
5938
5939	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
5940
5941
5942out:
5943	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
5944	return error;
5945}
5946