1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_shared.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_mount.h"
26#include "xfs_da_format.h"
27#include "xfs_da_btree.h"
28#include "xfs_attr_sf.h"
29#include "xfs_inode.h"
30#include "xfs_alloc.h"
31#include "xfs_trans.h"
32#include "xfs_inode_item.h"
33#include "xfs_bmap.h"
34#include "xfs_bmap_util.h"
35#include "xfs_bmap_btree.h"
36#include "xfs_attr.h"
37#include "xfs_attr_leaf.h"
38#include "xfs_attr_remote.h"
39#include "xfs_error.h"
40#include "xfs_quota.h"
41#include "xfs_trans_space.h"
42#include "xfs_trace.h"
43
44/*
45 * xfs_attr.c
46 *
47 * Provide the external interfaces to manage attribute lists.
48 */
49
50/*========================================================================
51 * Function prototypes for the kernel.
52 *========================================================================*/
53
54/*
55 * Internal routines when attribute list fits inside the inode.
56 */
57STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
58
59/*
60 * Internal routines when attribute list is one block.
61 */
62STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
63STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
64STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
65
66/*
67 * Internal routines when attribute list is more than one block.
68 */
69STATIC int xfs_attr_node_get(xfs_da_args_t *args);
70STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
71STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
72STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
73STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
74
75
76STATIC int
77xfs_attr_args_init(
78	struct xfs_da_args	*args,
79	struct xfs_inode	*dp,
80	const unsigned char	*name,
81	int			flags)
82{
83
84	if (!name)
85		return -EINVAL;
86
87	memset(args, 0, sizeof(*args));
88	args->geo = dp->i_mount->m_attr_geo;
89	args->whichfork = XFS_ATTR_FORK;
90	args->dp = dp;
91	args->flags = flags;
92	args->name = name;
93	args->namelen = strlen((const char *)name);
94	if (args->namelen >= MAXNAMELEN)
95		return -EFAULT;		/* match IRIX behaviour */
96
97	args->hashval = xfs_da_hashname(args->name, args->namelen);
98	return 0;
99}
100
101int
102xfs_inode_hasattr(
103	struct xfs_inode	*ip)
104{
105	if (!XFS_IFORK_Q(ip) ||
106	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
107	     ip->i_d.di_anextents == 0))
108		return 0;
109	return 1;
110}
111
112/*========================================================================
113 * Overall external interface routines.
114 *========================================================================*/
115
116int
117xfs_attr_get(
118	struct xfs_inode	*ip,
119	const unsigned char	*name,
120	unsigned char		*value,
121	int			*valuelenp,
122	int			flags)
123{
124	struct xfs_da_args	args;
125	uint			lock_mode;
126	int			error;
127
128	XFS_STATS_INC(ip->i_mount, xs_attr_get);
129
130	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
131		return -EIO;
132
133	if (!xfs_inode_hasattr(ip))
134		return -ENOATTR;
135
136	error = xfs_attr_args_init(&args, ip, name, flags);
137	if (error)
138		return error;
139
140	args.value = value;
141	args.valuelen = *valuelenp;
142	/* Entirely possible to look up a name which doesn't exist */
143	args.op_flags = XFS_DA_OP_OKNOENT;
144
145	lock_mode = xfs_ilock_attr_map_shared(ip);
146	if (!xfs_inode_hasattr(ip))
147		error = -ENOATTR;
148	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
149		error = xfs_attr_shortform_getvalue(&args);
150	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
151		error = xfs_attr_leaf_get(&args);
152	else
153		error = xfs_attr_node_get(&args);
154	xfs_iunlock(ip, lock_mode);
155
156	*valuelenp = args.valuelen;
157	return error == -EEXIST ? 0 : error;
158}
159
160/*
161 * Calculate how many blocks we need for the new attribute,
162 */
163STATIC int
164xfs_attr_calc_size(
165	struct xfs_da_args	*args,
166	int			*local)
167{
168	struct xfs_mount	*mp = args->dp->i_mount;
169	int			size;
170	int			nblks;
171
172	/*
173	 * Determine space new attribute will use, and if it would be
174	 * "local" or "remote" (note: local != inline).
175	 */
176	size = xfs_attr_leaf_newentsize(args, local);
177	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
178	if (*local) {
179		if (size > (args->geo->blksize / 2)) {
180			/* Double split possible */
181			nblks *= 2;
182		}
183	} else {
184		/*
185		 * Out of line attribute, cannot double split, but
186		 * make room for the attribute value itself.
187		 */
188		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
189		nblks += dblocks;
190		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
191	}
192
193	return nblks;
194}
195
196int
197xfs_attr_set(
198	struct xfs_inode	*dp,
199	const unsigned char	*name,
200	unsigned char		*value,
201	int			valuelen,
202	int			flags)
203{
204	struct xfs_mount	*mp = dp->i_mount;
205	struct xfs_da_args	args;
206	struct xfs_bmap_free	flist;
207	struct xfs_trans_res	tres;
208	xfs_fsblock_t		firstblock;
209	int			rsvd = (flags & ATTR_ROOT) != 0;
210	int			error, err2, committed, local;
211
212	XFS_STATS_INC(mp, xs_attr_set);
213
214	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
215		return -EIO;
216
217	error = xfs_attr_args_init(&args, dp, name, flags);
218	if (error)
219		return error;
220
221	args.value = value;
222	args.valuelen = valuelen;
223	args.firstblock = &firstblock;
224	args.flist = &flist;
225	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
226	args.total = xfs_attr_calc_size(&args, &local);
227
228	error = xfs_qm_dqattach(dp, 0);
229	if (error)
230		return error;
231
232	/*
233	 * If the inode doesn't have an attribute fork, add one.
234	 * (inode must not be locked when we call this routine)
235	 */
236	if (XFS_IFORK_Q(dp) == 0) {
237		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
238			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
239
240		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
241		if (error)
242			return error;
243	}
244
245	/*
246	 * Start our first transaction of the day.
247	 *
248	 * All future transactions during this code must be "chained" off
249	 * this one via the trans_dup() call.  All transactions will contain
250	 * the inode, and the inode will always be marked with trans_ihold().
251	 * Since the inode will be locked in all transactions, we must log
252	 * the inode in every transaction to let it float upward through
253	 * the log.
254	 */
255	args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
256
257	/*
258	 * Root fork attributes can use reserved data blocks for this
259	 * operation if necessary
260	 */
261
262	if (rsvd)
263		args.trans->t_flags |= XFS_TRANS_RESERVE;
264
265	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
266			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
267	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
268	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
269	error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
270	if (error) {
271		xfs_trans_cancel(args.trans);
272		return error;
273	}
274	xfs_ilock(dp, XFS_ILOCK_EXCL);
275
276	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
277				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
278				       XFS_QMOPT_RES_REGBLKS);
279	if (error) {
280		xfs_iunlock(dp, XFS_ILOCK_EXCL);
281		xfs_trans_cancel(args.trans);
282		return error;
283	}
284
285	xfs_trans_ijoin(args.trans, dp, 0);
286
287	/*
288	 * If the attribute list is non-existent or a shortform list,
289	 * upgrade it to a single-leaf-block attribute list.
290	 */
291	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
292	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
293	     dp->i_d.di_anextents == 0)) {
294
295		/*
296		 * Build initial attribute list (if required).
297		 */
298		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
299			xfs_attr_shortform_create(&args);
300
301		/*
302		 * Try to add the attr to the attribute list in
303		 * the inode.
304		 */
305		error = xfs_attr_shortform_addname(&args);
306		if (error != -ENOSPC) {
307			/*
308			 * Commit the shortform mods, and we're done.
309			 * NOTE: this is also the error path (EEXIST, etc).
310			 */
311			ASSERT(args.trans != NULL);
312
313			/*
314			 * If this is a synchronous mount, make sure that
315			 * the transaction goes to disk before returning
316			 * to the user.
317			 */
318			if (mp->m_flags & XFS_MOUNT_WSYNC)
319				xfs_trans_set_sync(args.trans);
320
321			if (!error && (flags & ATTR_KERNOTIME) == 0) {
322				xfs_trans_ichgtime(args.trans, dp,
323							XFS_ICHGTIME_CHG);
324			}
325			err2 = xfs_trans_commit(args.trans);
326			xfs_iunlock(dp, XFS_ILOCK_EXCL);
327
328			return error ? error : err2;
329		}
330
331		/*
332		 * It won't fit in the shortform, transform to a leaf block.
333		 * GROT: another possible req'mt for a double-split btree op.
334		 */
335		xfs_bmap_init(args.flist, args.firstblock);
336		error = xfs_attr_shortform_to_leaf(&args);
337		if (!error) {
338			error = xfs_bmap_finish(&args.trans, args.flist,
339						&committed);
340		}
341		if (error) {
342			ASSERT(committed);
343			args.trans = NULL;
344			xfs_bmap_cancel(&flist);
345			goto out;
346		}
347
348		/*
349		 * bmap_finish() may have committed the last trans and started
350		 * a new one.  We need the inode to be in all transactions.
351		 */
352		if (committed)
353			xfs_trans_ijoin(args.trans, dp, 0);
354
355		/*
356		 * Commit the leaf transformation.  We'll need another (linked)
357		 * transaction to add the new attribute to the leaf.
358		 */
359
360		error = xfs_trans_roll(&args.trans, dp);
361		if (error)
362			goto out;
363
364	}
365
366	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
367		error = xfs_attr_leaf_addname(&args);
368	else
369		error = xfs_attr_node_addname(&args);
370	if (error)
371		goto out;
372
373	/*
374	 * If this is a synchronous mount, make sure that the
375	 * transaction goes to disk before returning to the user.
376	 */
377	if (mp->m_flags & XFS_MOUNT_WSYNC)
378		xfs_trans_set_sync(args.trans);
379
380	if ((flags & ATTR_KERNOTIME) == 0)
381		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
382
383	/*
384	 * Commit the last in the sequence of transactions.
385	 */
386	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
387	error = xfs_trans_commit(args.trans);
388	xfs_iunlock(dp, XFS_ILOCK_EXCL);
389
390	return error;
391
392out:
393	if (args.trans)
394		xfs_trans_cancel(args.trans);
395	xfs_iunlock(dp, XFS_ILOCK_EXCL);
396	return error;
397}
398
399/*
400 * Generic handler routine to remove a name from an attribute list.
401 * Transitions attribute list from Btree to shortform as necessary.
402 */
403int
404xfs_attr_remove(
405	struct xfs_inode	*dp,
406	const unsigned char	*name,
407	int			flags)
408{
409	struct xfs_mount	*mp = dp->i_mount;
410	struct xfs_da_args	args;
411	struct xfs_bmap_free	flist;
412	xfs_fsblock_t		firstblock;
413	int			error;
414
415	XFS_STATS_INC(mp, xs_attr_remove);
416
417	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
418		return -EIO;
419
420	if (!xfs_inode_hasattr(dp))
421		return -ENOATTR;
422
423	error = xfs_attr_args_init(&args, dp, name, flags);
424	if (error)
425		return error;
426
427	args.firstblock = &firstblock;
428	args.flist = &flist;
429
430	/*
431	 * we have no control over the attribute names that userspace passes us
432	 * to remove, so we have to allow the name lookup prior to attribute
433	 * removal to fail.
434	 */
435	args.op_flags = XFS_DA_OP_OKNOENT;
436
437	error = xfs_qm_dqattach(dp, 0);
438	if (error)
439		return error;
440
441	/*
442	 * Start our first transaction of the day.
443	 *
444	 * All future transactions during this code must be "chained" off
445	 * this one via the trans_dup() call.  All transactions will contain
446	 * the inode, and the inode will always be marked with trans_ihold().
447	 * Since the inode will be locked in all transactions, we must log
448	 * the inode in every transaction to let it float upward through
449	 * the log.
450	 */
451	args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
452
453	/*
454	 * Root fork attributes can use reserved data blocks for this
455	 * operation if necessary
456	 */
457
458	if (flags & ATTR_ROOT)
459		args.trans->t_flags |= XFS_TRANS_RESERVE;
460
461	error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
462				  XFS_ATTRRM_SPACE_RES(mp), 0);
463	if (error) {
464		xfs_trans_cancel(args.trans);
465		return error;
466	}
467
468	xfs_ilock(dp, XFS_ILOCK_EXCL);
469	/*
470	 * No need to make quota reservations here. We expect to release some
471	 * blocks not allocate in the common case.
472	 */
473	xfs_trans_ijoin(args.trans, dp, 0);
474
475	if (!xfs_inode_hasattr(dp)) {
476		error = -ENOATTR;
477	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
478		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
479		error = xfs_attr_shortform_remove(&args);
480	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
481		error = xfs_attr_leaf_removename(&args);
482	} else {
483		error = xfs_attr_node_removename(&args);
484	}
485
486	if (error)
487		goto out;
488
489	/*
490	 * If this is a synchronous mount, make sure that the
491	 * transaction goes to disk before returning to the user.
492	 */
493	if (mp->m_flags & XFS_MOUNT_WSYNC)
494		xfs_trans_set_sync(args.trans);
495
496	if ((flags & ATTR_KERNOTIME) == 0)
497		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
498
499	/*
500	 * Commit the last in the sequence of transactions.
501	 */
502	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
503	error = xfs_trans_commit(args.trans);
504	xfs_iunlock(dp, XFS_ILOCK_EXCL);
505
506	return error;
507
508out:
509	if (args.trans)
510		xfs_trans_cancel(args.trans);
511	xfs_iunlock(dp, XFS_ILOCK_EXCL);
512	return error;
513}
514
515/*========================================================================
516 * External routines when attribute list is inside the inode
517 *========================================================================*/
518
519/*
520 * Add a name to the shortform attribute list structure
521 * This is the external routine.
522 */
523STATIC int
524xfs_attr_shortform_addname(xfs_da_args_t *args)
525{
526	int newsize, forkoff, retval;
527
528	trace_xfs_attr_sf_addname(args);
529
530	retval = xfs_attr_shortform_lookup(args);
531	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
532		return retval;
533	} else if (retval == -EEXIST) {
534		if (args->flags & ATTR_CREATE)
535			return retval;
536		retval = xfs_attr_shortform_remove(args);
537		ASSERT(retval == 0);
538	}
539
540	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
541	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
542		return -ENOSPC;
543
544	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
545	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
546
547	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
548	if (!forkoff)
549		return -ENOSPC;
550
551	xfs_attr_shortform_add(args, forkoff);
552	return 0;
553}
554
555
556/*========================================================================
557 * External routines when attribute list is one block
558 *========================================================================*/
559
560/*
561 * Add a name to the leaf attribute list structure
562 *
563 * This leaf block cannot have a "remote" value, we only call this routine
564 * if bmap_one_block() says there is only one block (ie: no remote blks).
565 */
566STATIC int
567xfs_attr_leaf_addname(xfs_da_args_t *args)
568{
569	xfs_inode_t *dp;
570	struct xfs_buf *bp;
571	int retval, error, committed, forkoff;
572
573	trace_xfs_attr_leaf_addname(args);
574
575	/*
576	 * Read the (only) block in the attribute list in.
577	 */
578	dp = args->dp;
579	args->blkno = 0;
580	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
581	if (error)
582		return error;
583
584	/*
585	 * Look up the given attribute in the leaf block.  Figure out if
586	 * the given flags produce an error or call for an atomic rename.
587	 */
588	retval = xfs_attr3_leaf_lookup_int(bp, args);
589	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
590		xfs_trans_brelse(args->trans, bp);
591		return retval;
592	} else if (retval == -EEXIST) {
593		if (args->flags & ATTR_CREATE) {	/* pure create op */
594			xfs_trans_brelse(args->trans, bp);
595			return retval;
596		}
597
598		trace_xfs_attr_leaf_replace(args);
599
600		/* save the attribute state for later removal*/
601		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
602		args->blkno2 = args->blkno;		/* set 2nd entry info*/
603		args->index2 = args->index;
604		args->rmtblkno2 = args->rmtblkno;
605		args->rmtblkcnt2 = args->rmtblkcnt;
606		args->rmtvaluelen2 = args->rmtvaluelen;
607
608		/*
609		 * clear the remote attr state now that it is saved so that the
610		 * values reflect the state of the attribute we are about to
611		 * add, not the attribute we just found and will remove later.
612		 */
613		args->rmtblkno = 0;
614		args->rmtblkcnt = 0;
615		args->rmtvaluelen = 0;
616	}
617
618	/*
619	 * Add the attribute to the leaf block, transitioning to a Btree
620	 * if required.
621	 */
622	retval = xfs_attr3_leaf_add(bp, args);
623	if (retval == -ENOSPC) {
624		/*
625		 * Promote the attribute list to the Btree format, then
626		 * Commit that transaction so that the node_addname() call
627		 * can manage its own transactions.
628		 */
629		xfs_bmap_init(args->flist, args->firstblock);
630		error = xfs_attr3_leaf_to_node(args);
631		if (!error) {
632			error = xfs_bmap_finish(&args->trans, args->flist,
633						&committed);
634		}
635		if (error) {
636			ASSERT(committed);
637			args->trans = NULL;
638			xfs_bmap_cancel(args->flist);
639			return error;
640		}
641
642		/*
643		 * bmap_finish() may have committed the last trans and started
644		 * a new one.  We need the inode to be in all transactions.
645		 */
646		if (committed)
647			xfs_trans_ijoin(args->trans, dp, 0);
648
649		/*
650		 * Commit the current trans (including the inode) and start
651		 * a new one.
652		 */
653		error = xfs_trans_roll(&args->trans, dp);
654		if (error)
655			return error;
656
657		/*
658		 * Fob the whole rest of the problem off on the Btree code.
659		 */
660		error = xfs_attr_node_addname(args);
661		return error;
662	}
663
664	/*
665	 * Commit the transaction that added the attr name so that
666	 * later routines can manage their own transactions.
667	 */
668	error = xfs_trans_roll(&args->trans, dp);
669	if (error)
670		return error;
671
672	/*
673	 * If there was an out-of-line value, allocate the blocks we
674	 * identified for its storage and copy the value.  This is done
675	 * after we create the attribute so that we don't overflow the
676	 * maximum size of a transaction and/or hit a deadlock.
677	 */
678	if (args->rmtblkno > 0) {
679		error = xfs_attr_rmtval_set(args);
680		if (error)
681			return error;
682	}
683
684	/*
685	 * If this is an atomic rename operation, we must "flip" the
686	 * incomplete flags on the "new" and "old" attribute/value pairs
687	 * so that one disappears and one appears atomically.  Then we
688	 * must remove the "old" attribute/value pair.
689	 */
690	if (args->op_flags & XFS_DA_OP_RENAME) {
691		/*
692		 * In a separate transaction, set the incomplete flag on the
693		 * "old" attr and clear the incomplete flag on the "new" attr.
694		 */
695		error = xfs_attr3_leaf_flipflags(args);
696		if (error)
697			return error;
698
699		/*
700		 * Dismantle the "old" attribute/value pair by removing
701		 * a "remote" value (if it exists).
702		 */
703		args->index = args->index2;
704		args->blkno = args->blkno2;
705		args->rmtblkno = args->rmtblkno2;
706		args->rmtblkcnt = args->rmtblkcnt2;
707		args->rmtvaluelen = args->rmtvaluelen2;
708		if (args->rmtblkno) {
709			error = xfs_attr_rmtval_remove(args);
710			if (error)
711				return error;
712		}
713
714		/*
715		 * Read in the block containing the "old" attr, then
716		 * remove the "old" attr from that block (neat, huh!)
717		 */
718		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
719					   -1, &bp);
720		if (error)
721			return error;
722
723		xfs_attr3_leaf_remove(bp, args);
724
725		/*
726		 * If the result is small enough, shrink it all into the inode.
727		 */
728		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
729			xfs_bmap_init(args->flist, args->firstblock);
730			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
731			/* bp is gone due to xfs_da_shrink_inode */
732			if (!error) {
733				error = xfs_bmap_finish(&args->trans,
734							args->flist,
735							&committed);
736			}
737			if (error) {
738				ASSERT(committed);
739				args->trans = NULL;
740				xfs_bmap_cancel(args->flist);
741				return error;
742			}
743
744			/*
745			 * bmap_finish() may have committed the last trans
746			 * and started a new one.  We need the inode to be
747			 * in all transactions.
748			 */
749			if (committed)
750				xfs_trans_ijoin(args->trans, dp, 0);
751		}
752
753		/*
754		 * Commit the remove and start the next trans in series.
755		 */
756		error = xfs_trans_roll(&args->trans, dp);
757
758	} else if (args->rmtblkno > 0) {
759		/*
760		 * Added a "remote" value, just clear the incomplete flag.
761		 */
762		error = xfs_attr3_leaf_clearflag(args);
763	}
764	return error;
765}
766
767/*
768 * Remove a name from the leaf attribute list structure
769 *
770 * This leaf block cannot have a "remote" value, we only call this routine
771 * if bmap_one_block() says there is only one block (ie: no remote blks).
772 */
773STATIC int
774xfs_attr_leaf_removename(xfs_da_args_t *args)
775{
776	xfs_inode_t *dp;
777	struct xfs_buf *bp;
778	int error, committed, forkoff;
779
780	trace_xfs_attr_leaf_removename(args);
781
782	/*
783	 * Remove the attribute.
784	 */
785	dp = args->dp;
786	args->blkno = 0;
787	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
788	if (error)
789		return error;
790
791	error = xfs_attr3_leaf_lookup_int(bp, args);
792	if (error == -ENOATTR) {
793		xfs_trans_brelse(args->trans, bp);
794		return error;
795	}
796
797	xfs_attr3_leaf_remove(bp, args);
798
799	/*
800	 * If the result is small enough, shrink it all into the inode.
801	 */
802	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
803		xfs_bmap_init(args->flist, args->firstblock);
804		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
805		/* bp is gone due to xfs_da_shrink_inode */
806		if (!error) {
807			error = xfs_bmap_finish(&args->trans, args->flist,
808						&committed);
809		}
810		if (error) {
811			ASSERT(committed);
812			args->trans = NULL;
813			xfs_bmap_cancel(args->flist);
814			return error;
815		}
816
817		/*
818		 * bmap_finish() may have committed the last trans and started
819		 * a new one.  We need the inode to be in all transactions.
820		 */
821		if (committed)
822			xfs_trans_ijoin(args->trans, dp, 0);
823	}
824	return 0;
825}
826
827/*
828 * Look up a name in a leaf attribute list structure.
829 *
830 * This leaf block cannot have a "remote" value, we only call this routine
831 * if bmap_one_block() says there is only one block (ie: no remote blks).
832 */
833STATIC int
834xfs_attr_leaf_get(xfs_da_args_t *args)
835{
836	struct xfs_buf *bp;
837	int error;
838
839	trace_xfs_attr_leaf_get(args);
840
841	args->blkno = 0;
842	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
843	if (error)
844		return error;
845
846	error = xfs_attr3_leaf_lookup_int(bp, args);
847	if (error != -EEXIST)  {
848		xfs_trans_brelse(args->trans, bp);
849		return error;
850	}
851	error = xfs_attr3_leaf_getvalue(bp, args);
852	xfs_trans_brelse(args->trans, bp);
853	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
854		error = xfs_attr_rmtval_get(args);
855	}
856	return error;
857}
858
859/*========================================================================
860 * External routines when attribute list size > geo->blksize
861 *========================================================================*/
862
863/*
864 * Add a name to a Btree-format attribute list.
865 *
866 * This will involve walking down the Btree, and may involve splitting
867 * leaf nodes and even splitting intermediate nodes up to and including
868 * the root node (a special case of an intermediate node).
869 *
870 * "Remote" attribute values confuse the issue and atomic rename operations
871 * add a whole extra layer of confusion on top of that.
872 */
873STATIC int
874xfs_attr_node_addname(xfs_da_args_t *args)
875{
876	xfs_da_state_t *state;
877	xfs_da_state_blk_t *blk;
878	xfs_inode_t *dp;
879	xfs_mount_t *mp;
880	int committed, retval, error;
881
882	trace_xfs_attr_node_addname(args);
883
884	/*
885	 * Fill in bucket of arguments/results/context to carry around.
886	 */
887	dp = args->dp;
888	mp = dp->i_mount;
889restart:
890	state = xfs_da_state_alloc();
891	state->args = args;
892	state->mp = mp;
893
894	/*
895	 * Search to see if name already exists, and get back a pointer
896	 * to where it should go.
897	 */
898	error = xfs_da3_node_lookup_int(state, &retval);
899	if (error)
900		goto out;
901	blk = &state->path.blk[ state->path.active-1 ];
902	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
903	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
904		goto out;
905	} else if (retval == -EEXIST) {
906		if (args->flags & ATTR_CREATE)
907			goto out;
908
909		trace_xfs_attr_node_replace(args);
910
911		/* save the attribute state for later removal*/
912		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
913		args->blkno2 = args->blkno;		/* set 2nd entry info*/
914		args->index2 = args->index;
915		args->rmtblkno2 = args->rmtblkno;
916		args->rmtblkcnt2 = args->rmtblkcnt;
917		args->rmtvaluelen2 = args->rmtvaluelen;
918
919		/*
920		 * clear the remote attr state now that it is saved so that the
921		 * values reflect the state of the attribute we are about to
922		 * add, not the attribute we just found and will remove later.
923		 */
924		args->rmtblkno = 0;
925		args->rmtblkcnt = 0;
926		args->rmtvaluelen = 0;
927	}
928
929	retval = xfs_attr3_leaf_add(blk->bp, state->args);
930	if (retval == -ENOSPC) {
931		if (state->path.active == 1) {
932			/*
933			 * Its really a single leaf node, but it had
934			 * out-of-line values so it looked like it *might*
935			 * have been a b-tree.
936			 */
937			xfs_da_state_free(state);
938			state = NULL;
939			xfs_bmap_init(args->flist, args->firstblock);
940			error = xfs_attr3_leaf_to_node(args);
941			if (!error) {
942				error = xfs_bmap_finish(&args->trans,
943							args->flist,
944							&committed);
945			}
946			if (error) {
947				ASSERT(committed);
948				args->trans = NULL;
949				xfs_bmap_cancel(args->flist);
950				goto out;
951			}
952
953			/*
954			 * bmap_finish() may have committed the last trans
955			 * and started a new one.  We need the inode to be
956			 * in all transactions.
957			 */
958			if (committed)
959				xfs_trans_ijoin(args->trans, dp, 0);
960
961			/*
962			 * Commit the node conversion and start the next
963			 * trans in the chain.
964			 */
965			error = xfs_trans_roll(&args->trans, dp);
966			if (error)
967				goto out;
968
969			goto restart;
970		}
971
972		/*
973		 * Split as many Btree elements as required.
974		 * This code tracks the new and old attr's location
975		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
976		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
977		 */
978		xfs_bmap_init(args->flist, args->firstblock);
979		error = xfs_da3_split(state);
980		if (!error) {
981			error = xfs_bmap_finish(&args->trans, args->flist,
982						&committed);
983		}
984		if (error) {
985			ASSERT(committed);
986			args->trans = NULL;
987			xfs_bmap_cancel(args->flist);
988			goto out;
989		}
990
991		/*
992		 * bmap_finish() may have committed the last trans and started
993		 * a new one.  We need the inode to be in all transactions.
994		 */
995		if (committed)
996			xfs_trans_ijoin(args->trans, dp, 0);
997	} else {
998		/*
999		 * Addition succeeded, update Btree hashvals.
1000		 */
1001		xfs_da3_fixhashpath(state, &state->path);
1002	}
1003
1004	/*
1005	 * Kill the state structure, we're done with it and need to
1006	 * allow the buffers to come back later.
1007	 */
1008	xfs_da_state_free(state);
1009	state = NULL;
1010
1011	/*
1012	 * Commit the leaf addition or btree split and start the next
1013	 * trans in the chain.
1014	 */
1015	error = xfs_trans_roll(&args->trans, dp);
1016	if (error)
1017		goto out;
1018
1019	/*
1020	 * If there was an out-of-line value, allocate the blocks we
1021	 * identified for its storage and copy the value.  This is done
1022	 * after we create the attribute so that we don't overflow the
1023	 * maximum size of a transaction and/or hit a deadlock.
1024	 */
1025	if (args->rmtblkno > 0) {
1026		error = xfs_attr_rmtval_set(args);
1027		if (error)
1028			return error;
1029	}
1030
1031	/*
1032	 * If this is an atomic rename operation, we must "flip" the
1033	 * incomplete flags on the "new" and "old" attribute/value pairs
1034	 * so that one disappears and one appears atomically.  Then we
1035	 * must remove the "old" attribute/value pair.
1036	 */
1037	if (args->op_flags & XFS_DA_OP_RENAME) {
1038		/*
1039		 * In a separate transaction, set the incomplete flag on the
1040		 * "old" attr and clear the incomplete flag on the "new" attr.
1041		 */
1042		error = xfs_attr3_leaf_flipflags(args);
1043		if (error)
1044			goto out;
1045
1046		/*
1047		 * Dismantle the "old" attribute/value pair by removing
1048		 * a "remote" value (if it exists).
1049		 */
1050		args->index = args->index2;
1051		args->blkno = args->blkno2;
1052		args->rmtblkno = args->rmtblkno2;
1053		args->rmtblkcnt = args->rmtblkcnt2;
1054		args->rmtvaluelen = args->rmtvaluelen2;
1055		if (args->rmtblkno) {
1056			error = xfs_attr_rmtval_remove(args);
1057			if (error)
1058				return error;
1059		}
1060
1061		/*
1062		 * Re-find the "old" attribute entry after any split ops.
1063		 * The INCOMPLETE flag means that we will find the "old"
1064		 * attr, not the "new" one.
1065		 */
1066		args->flags |= XFS_ATTR_INCOMPLETE;
1067		state = xfs_da_state_alloc();
1068		state->args = args;
1069		state->mp = mp;
1070		state->inleaf = 0;
1071		error = xfs_da3_node_lookup_int(state, &retval);
1072		if (error)
1073			goto out;
1074
1075		/*
1076		 * Remove the name and update the hashvals in the tree.
1077		 */
1078		blk = &state->path.blk[ state->path.active-1 ];
1079		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1080		error = xfs_attr3_leaf_remove(blk->bp, args);
1081		xfs_da3_fixhashpath(state, &state->path);
1082
1083		/*
1084		 * Check to see if the tree needs to be collapsed.
1085		 */
1086		if (retval && (state->path.active > 1)) {
1087			xfs_bmap_init(args->flist, args->firstblock);
1088			error = xfs_da3_join(state);
1089			if (!error) {
1090				error = xfs_bmap_finish(&args->trans,
1091							args->flist,
1092							&committed);
1093			}
1094			if (error) {
1095				ASSERT(committed);
1096				args->trans = NULL;
1097				xfs_bmap_cancel(args->flist);
1098				goto out;
1099			}
1100
1101			/*
1102			 * bmap_finish() may have committed the last trans
1103			 * and started a new one.  We need the inode to be
1104			 * in all transactions.
1105			 */
1106			if (committed)
1107				xfs_trans_ijoin(args->trans, dp, 0);
1108		}
1109
1110		/*
1111		 * Commit and start the next trans in the chain.
1112		 */
1113		error = xfs_trans_roll(&args->trans, dp);
1114		if (error)
1115			goto out;
1116
1117	} else if (args->rmtblkno > 0) {
1118		/*
1119		 * Added a "remote" value, just clear the incomplete flag.
1120		 */
1121		error = xfs_attr3_leaf_clearflag(args);
1122		if (error)
1123			goto out;
1124	}
1125	retval = error = 0;
1126
1127out:
1128	if (state)
1129		xfs_da_state_free(state);
1130	if (error)
1131		return error;
1132	return retval;
1133}
1134
1135/*
1136 * Remove a name from a B-tree attribute list.
1137 *
1138 * This will involve walking down the Btree, and may involve joining
1139 * leaf nodes and even joining intermediate nodes up to and including
1140 * the root node (a special case of an intermediate node).
1141 */
1142STATIC int
1143xfs_attr_node_removename(xfs_da_args_t *args)
1144{
1145	xfs_da_state_t *state;
1146	xfs_da_state_blk_t *blk;
1147	xfs_inode_t *dp;
1148	struct xfs_buf *bp;
1149	int retval, error, committed, forkoff;
1150
1151	trace_xfs_attr_node_removename(args);
1152
1153	/*
1154	 * Tie a string around our finger to remind us where we are.
1155	 */
1156	dp = args->dp;
1157	state = xfs_da_state_alloc();
1158	state->args = args;
1159	state->mp = dp->i_mount;
1160
1161	/*
1162	 * Search to see if name exists, and get back a pointer to it.
1163	 */
1164	error = xfs_da3_node_lookup_int(state, &retval);
1165	if (error || (retval != -EEXIST)) {
1166		if (error == 0)
1167			error = retval;
1168		goto out;
1169	}
1170
1171	/*
1172	 * If there is an out-of-line value, de-allocate the blocks.
1173	 * This is done before we remove the attribute so that we don't
1174	 * overflow the maximum size of a transaction and/or hit a deadlock.
1175	 */
1176	blk = &state->path.blk[ state->path.active-1 ];
1177	ASSERT(blk->bp != NULL);
1178	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1179	if (args->rmtblkno > 0) {
1180		/*
1181		 * Fill in disk block numbers in the state structure
1182		 * so that we can get the buffers back after we commit
1183		 * several transactions in the following calls.
1184		 */
1185		error = xfs_attr_fillstate(state);
1186		if (error)
1187			goto out;
1188
1189		/*
1190		 * Mark the attribute as INCOMPLETE, then bunmapi() the
1191		 * remote value.
1192		 */
1193		error = xfs_attr3_leaf_setflag(args);
1194		if (error)
1195			goto out;
1196		error = xfs_attr_rmtval_remove(args);
1197		if (error)
1198			goto out;
1199
1200		/*
1201		 * Refill the state structure with buffers, the prior calls
1202		 * released our buffers.
1203		 */
1204		error = xfs_attr_refillstate(state);
1205		if (error)
1206			goto out;
1207	}
1208
1209	/*
1210	 * Remove the name and update the hashvals in the tree.
1211	 */
1212	blk = &state->path.blk[ state->path.active-1 ];
1213	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1214	retval = xfs_attr3_leaf_remove(blk->bp, args);
1215	xfs_da3_fixhashpath(state, &state->path);
1216
1217	/*
1218	 * Check to see if the tree needs to be collapsed.
1219	 */
1220	if (retval && (state->path.active > 1)) {
1221		xfs_bmap_init(args->flist, args->firstblock);
1222		error = xfs_da3_join(state);
1223		if (!error) {
1224			error = xfs_bmap_finish(&args->trans, args->flist,
1225						&committed);
1226		}
1227		if (error) {
1228			ASSERT(committed);
1229			args->trans = NULL;
1230			xfs_bmap_cancel(args->flist);
1231			goto out;
1232		}
1233
1234		/*
1235		 * bmap_finish() may have committed the last trans and started
1236		 * a new one.  We need the inode to be in all transactions.
1237		 */
1238		if (committed)
1239			xfs_trans_ijoin(args->trans, dp, 0);
1240
1241		/*
1242		 * Commit the Btree join operation and start a new trans.
1243		 */
1244		error = xfs_trans_roll(&args->trans, dp);
1245		if (error)
1246			goto out;
1247	}
1248
1249	/*
1250	 * If the result is small enough, push it all into the inode.
1251	 */
1252	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1253		/*
1254		 * Have to get rid of the copy of this dabuf in the state.
1255		 */
1256		ASSERT(state->path.active == 1);
1257		ASSERT(state->path.blk[0].bp);
1258		state->path.blk[0].bp = NULL;
1259
1260		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1261		if (error)
1262			goto out;
1263
1264		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1265			xfs_bmap_init(args->flist, args->firstblock);
1266			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1267			/* bp is gone due to xfs_da_shrink_inode */
1268			if (!error) {
1269				error = xfs_bmap_finish(&args->trans,
1270							args->flist,
1271							&committed);
1272			}
1273			if (error) {
1274				ASSERT(committed);
1275				args->trans = NULL;
1276				xfs_bmap_cancel(args->flist);
1277				goto out;
1278			}
1279
1280			/*
1281			 * bmap_finish() may have committed the last trans
1282			 * and started a new one.  We need the inode to be
1283			 * in all transactions.
1284			 */
1285			if (committed)
1286				xfs_trans_ijoin(args->trans, dp, 0);
1287		} else
1288			xfs_trans_brelse(args->trans, bp);
1289	}
1290	error = 0;
1291
1292out:
1293	xfs_da_state_free(state);
1294	return error;
1295}
1296
1297/*
1298 * Fill in the disk block numbers in the state structure for the buffers
1299 * that are attached to the state structure.
1300 * This is done so that we can quickly reattach ourselves to those buffers
1301 * after some set of transaction commits have released these buffers.
1302 */
1303STATIC int
1304xfs_attr_fillstate(xfs_da_state_t *state)
1305{
1306	xfs_da_state_path_t *path;
1307	xfs_da_state_blk_t *blk;
1308	int level;
1309
1310	trace_xfs_attr_fillstate(state->args);
1311
1312	/*
1313	 * Roll down the "path" in the state structure, storing the on-disk
1314	 * block number for those buffers in the "path".
1315	 */
1316	path = &state->path;
1317	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1318	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1319		if (blk->bp) {
1320			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1321			blk->bp = NULL;
1322		} else {
1323			blk->disk_blkno = 0;
1324		}
1325	}
1326
1327	/*
1328	 * Roll down the "altpath" in the state structure, storing the on-disk
1329	 * block number for those buffers in the "altpath".
1330	 */
1331	path = &state->altpath;
1332	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1333	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1334		if (blk->bp) {
1335			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1336			blk->bp = NULL;
1337		} else {
1338			blk->disk_blkno = 0;
1339		}
1340	}
1341
1342	return 0;
1343}
1344
1345/*
1346 * Reattach the buffers to the state structure based on the disk block
1347 * numbers stored in the state structure.
1348 * This is done after some set of transaction commits have released those
1349 * buffers from our grip.
1350 */
1351STATIC int
1352xfs_attr_refillstate(xfs_da_state_t *state)
1353{
1354	xfs_da_state_path_t *path;
1355	xfs_da_state_blk_t *blk;
1356	int level, error;
1357
1358	trace_xfs_attr_refillstate(state->args);
1359
1360	/*
1361	 * Roll down the "path" in the state structure, storing the on-disk
1362	 * block number for those buffers in the "path".
1363	 */
1364	path = &state->path;
1365	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1366	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1367		if (blk->disk_blkno) {
1368			error = xfs_da3_node_read(state->args->trans,
1369						state->args->dp,
1370						blk->blkno, blk->disk_blkno,
1371						&blk->bp, XFS_ATTR_FORK);
1372			if (error)
1373				return error;
1374		} else {
1375			blk->bp = NULL;
1376		}
1377	}
1378
1379	/*
1380	 * Roll down the "altpath" in the state structure, storing the on-disk
1381	 * block number for those buffers in the "altpath".
1382	 */
1383	path = &state->altpath;
1384	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1385	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1386		if (blk->disk_blkno) {
1387			error = xfs_da3_node_read(state->args->trans,
1388						state->args->dp,
1389						blk->blkno, blk->disk_blkno,
1390						&blk->bp, XFS_ATTR_FORK);
1391			if (error)
1392				return error;
1393		} else {
1394			blk->bp = NULL;
1395		}
1396	}
1397
1398	return 0;
1399}
1400
1401/*
1402 * Look up a filename in a node attribute list.
1403 *
1404 * This routine gets called for any attribute fork that has more than one
1405 * block, ie: both true Btree attr lists and for single-leaf-blocks with
1406 * "remote" values taking up more blocks.
1407 */
1408STATIC int
1409xfs_attr_node_get(xfs_da_args_t *args)
1410{
1411	xfs_da_state_t *state;
1412	xfs_da_state_blk_t *blk;
1413	int error, retval;
1414	int i;
1415
1416	trace_xfs_attr_node_get(args);
1417
1418	state = xfs_da_state_alloc();
1419	state->args = args;
1420	state->mp = args->dp->i_mount;
1421
1422	/*
1423	 * Search to see if name exists, and get back a pointer to it.
1424	 */
1425	error = xfs_da3_node_lookup_int(state, &retval);
1426	if (error) {
1427		retval = error;
1428	} else if (retval == -EEXIST) {
1429		blk = &state->path.blk[ state->path.active-1 ];
1430		ASSERT(blk->bp != NULL);
1431		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1432
1433		/*
1434		 * Get the value, local or "remote"
1435		 */
1436		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1437		if (!retval && (args->rmtblkno > 0)
1438		    && !(args->flags & ATTR_KERNOVAL)) {
1439			retval = xfs_attr_rmtval_get(args);
1440		}
1441	}
1442
1443	/*
1444	 * If not in a transaction, we have to release all the buffers.
1445	 */
1446	for (i = 0; i < state->path.active; i++) {
1447		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1448		state->path.blk[i].bp = NULL;
1449	}
1450
1451	xfs_da_state_free(state);
1452	return retval;
1453}
1454