1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19#include "xfs.h"
20#include "xfs_fs.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_mount.h"
25#include "xfs_da_format.h"
26#include "xfs_da_btree.h"
27#include "xfs_inode.h"
28#include "xfs_dir2.h"
29#include "xfs_dir2_priv.h"
30#include "xfs_error.h"
31#include "xfs_trans.h"
32#include "xfs_buf_item.h"
33#include "xfs_cksum.h"
34
35/*
36 * Check the consistency of the data block.
37 * The input can also be a block-format directory.
38 * Return 0 is the buffer is good, otherwise an error.
39 */
40int
41__xfs_dir3_data_check(
42	struct xfs_inode	*dp,		/* incore inode pointer */
43	struct xfs_buf		*bp)		/* data block's buffer */
44{
45	xfs_dir2_dataptr_t	addr;		/* addr for leaf lookup */
46	xfs_dir2_data_free_t	*bf;		/* bestfree table */
47	xfs_dir2_block_tail_t	*btp=NULL;	/* block tail */
48	int			count;		/* count of entries found */
49	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
50	xfs_dir2_data_entry_t	*dep;		/* data entry */
51	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
52	xfs_dir2_data_unused_t	*dup;		/* unused entry */
53	char			*endp;		/* end of useful data */
54	int			freeseen;	/* mask of bestfrees seen */
55	xfs_dahash_t		hash;		/* hash of current name */
56	int			i;		/* leaf index */
57	int			lastfree;	/* last entry was unused */
58	xfs_dir2_leaf_entry_t	*lep=NULL;	/* block leaf entries */
59	xfs_mount_t		*mp;		/* filesystem mount point */
60	char			*p;		/* current data position */
61	int			stale;		/* count of stale leaves */
62	struct xfs_name		name;
63	const struct xfs_dir_ops *ops;
64	struct xfs_da_geometry	*geo;
65
66	mp = bp->b_target->bt_mount;
67	geo = mp->m_dir_geo;
68
69	/*
70	 * We can be passed a null dp here from a verifier, so we need to go the
71	 * hard way to get them.
72	 */
73	ops = xfs_dir_get_ops(mp, dp);
74
75	hdr = bp->b_addr;
76	p = (char *)ops->data_entry_p(hdr);
77
78	switch (hdr->magic) {
79	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
80	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
81		btp = xfs_dir2_block_tail_p(geo, hdr);
82		lep = xfs_dir2_block_leaf_p(btp);
83		endp = (char *)lep;
84
85		/*
86		 * The number of leaf entries is limited by the size of the
87		 * block and the amount of space used by the data entries.
88		 * We don't know how much space is used by the data entries yet,
89		 * so just ensure that the count falls somewhere inside the
90		 * block right now.
91		 */
92		XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
93			((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
94		break;
95	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
96	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
97		endp = (char *)hdr + geo->blksize;
98		break;
99	default:
100		XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
101		return -EFSCORRUPTED;
102	}
103
104	/*
105	 * Account for zero bestfree entries.
106	 */
107	bf = ops->data_bestfree_p(hdr);
108	count = lastfree = freeseen = 0;
109	if (!bf[0].length) {
110		XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
111		freeseen |= 1 << 0;
112	}
113	if (!bf[1].length) {
114		XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
115		freeseen |= 1 << 1;
116	}
117	if (!bf[2].length) {
118		XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
119		freeseen |= 1 << 2;
120	}
121
122	XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
123						be16_to_cpu(bf[1].length));
124	XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
125						be16_to_cpu(bf[2].length));
126	/*
127	 * Loop over the data/unused entries.
128	 */
129	while (p < endp) {
130		dup = (xfs_dir2_data_unused_t *)p;
131		/*
132		 * If it's unused, look for the space in the bestfree table.
133		 * If we find it, account for that, else make sure it
134		 * doesn't need to be there.
135		 */
136		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
137			XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
138			XFS_WANT_CORRUPTED_RETURN(mp,
139				be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
140					       (char *)dup - (char *)hdr);
141			dfp = xfs_dir2_data_freefind(hdr, bf, dup);
142			if (dfp) {
143				i = (int)(dfp - bf);
144				XFS_WANT_CORRUPTED_RETURN(mp,
145					(freeseen & (1 << i)) == 0);
146				freeseen |= 1 << i;
147			} else {
148				XFS_WANT_CORRUPTED_RETURN(mp,
149					be16_to_cpu(dup->length) <=
150						be16_to_cpu(bf[2].length));
151			}
152			p += be16_to_cpu(dup->length);
153			lastfree = 1;
154			continue;
155		}
156		/*
157		 * It's a real entry.  Validate the fields.
158		 * If this is a block directory then make sure it's
159		 * in the leaf section of the block.
160		 * The linear search is crude but this is DEBUG code.
161		 */
162		dep = (xfs_dir2_data_entry_t *)p;
163		XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
164		XFS_WANT_CORRUPTED_RETURN(mp,
165			!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
166		XFS_WANT_CORRUPTED_RETURN(mp,
167			be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
168					       (char *)dep - (char *)hdr);
169		XFS_WANT_CORRUPTED_RETURN(mp,
170				ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
171		count++;
172		lastfree = 0;
173		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
174		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
175			addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
176						(xfs_dir2_data_aoff_t)
177						((char *)dep - (char *)hdr));
178			name.name = dep->name;
179			name.len = dep->namelen;
180			hash = mp->m_dirnameops->hashname(&name);
181			for (i = 0; i < be32_to_cpu(btp->count); i++) {
182				if (be32_to_cpu(lep[i].address) == addr &&
183				    be32_to_cpu(lep[i].hashval) == hash)
184					break;
185			}
186			XFS_WANT_CORRUPTED_RETURN(mp,
187						  i < be32_to_cpu(btp->count));
188		}
189		p += ops->data_entsize(dep->namelen);
190	}
191	/*
192	 * Need to have seen all the entries and all the bestfree slots.
193	 */
194	XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
195	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
196	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
197		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
198			if (lep[i].address ==
199			    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
200				stale++;
201			if (i > 0)
202				XFS_WANT_CORRUPTED_RETURN(mp,
203					be32_to_cpu(lep[i].hashval) >=
204						be32_to_cpu(lep[i - 1].hashval));
205		}
206		XFS_WANT_CORRUPTED_RETURN(mp, count ==
207			be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
208		XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
209	}
210	return 0;
211}
212
213static bool
214xfs_dir3_data_verify(
215	struct xfs_buf		*bp)
216{
217	struct xfs_mount	*mp = bp->b_target->bt_mount;
218	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
219
220	if (xfs_sb_version_hascrc(&mp->m_sb)) {
221		if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
222			return false;
223		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
224			return false;
225		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
226			return false;
227	} else {
228		if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
229			return false;
230	}
231	if (__xfs_dir3_data_check(NULL, bp))
232		return false;
233	return true;
234}
235
236/*
237 * Readahead of the first block of the directory when it is opened is completely
238 * oblivious to the format of the directory. Hence we can either get a block
239 * format buffer or a data format buffer on readahead.
240 */
241static void
242xfs_dir3_data_reada_verify(
243	struct xfs_buf		*bp)
244{
245	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
246
247	switch (hdr->magic) {
248	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
249	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
250		bp->b_ops = &xfs_dir3_block_buf_ops;
251		bp->b_ops->verify_read(bp);
252		return;
253	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
254	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
255		bp->b_ops = &xfs_dir3_data_buf_ops;
256		bp->b_ops->verify_read(bp);
257		return;
258	default:
259		xfs_buf_ioerror(bp, -EFSCORRUPTED);
260		xfs_verifier_error(bp);
261		break;
262	}
263}
264
265static void
266xfs_dir3_data_read_verify(
267	struct xfs_buf	*bp)
268{
269	struct xfs_mount	*mp = bp->b_target->bt_mount;
270
271	if (xfs_sb_version_hascrc(&mp->m_sb) &&
272	     !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
273		 xfs_buf_ioerror(bp, -EFSBADCRC);
274	else if (!xfs_dir3_data_verify(bp))
275		xfs_buf_ioerror(bp, -EFSCORRUPTED);
276
277	if (bp->b_error)
278		xfs_verifier_error(bp);
279}
280
281static void
282xfs_dir3_data_write_verify(
283	struct xfs_buf	*bp)
284{
285	struct xfs_mount	*mp = bp->b_target->bt_mount;
286	struct xfs_buf_log_item	*bip = bp->b_fspriv;
287	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
288
289	if (!xfs_dir3_data_verify(bp)) {
290		xfs_buf_ioerror(bp, -EFSCORRUPTED);
291		xfs_verifier_error(bp);
292		return;
293	}
294
295	if (!xfs_sb_version_hascrc(&mp->m_sb))
296		return;
297
298	if (bip)
299		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
300
301	xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
302}
303
304const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
305	.verify_read = xfs_dir3_data_read_verify,
306	.verify_write = xfs_dir3_data_write_verify,
307};
308
309static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
310	.verify_read = xfs_dir3_data_reada_verify,
311	.verify_write = xfs_dir3_data_write_verify,
312};
313
314
315int
316xfs_dir3_data_read(
317	struct xfs_trans	*tp,
318	struct xfs_inode	*dp,
319	xfs_dablk_t		bno,
320	xfs_daddr_t		mapped_bno,
321	struct xfs_buf		**bpp)
322{
323	int			err;
324
325	err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
326				XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
327	if (!err && tp)
328		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
329	return err;
330}
331
332int
333xfs_dir3_data_readahead(
334	struct xfs_inode	*dp,
335	xfs_dablk_t		bno,
336	xfs_daddr_t		mapped_bno)
337{
338	return xfs_da_reada_buf(dp, bno, mapped_bno,
339				XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
340}
341
342/*
343 * Given a data block and an unused entry from that block,
344 * return the bestfree entry if any that corresponds to it.
345 */
346xfs_dir2_data_free_t *
347xfs_dir2_data_freefind(
348	struct xfs_dir2_data_hdr *hdr,		/* data block header */
349	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */
350	struct xfs_dir2_data_unused *dup)	/* unused space */
351{
352	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
353	xfs_dir2_data_aoff_t	off;		/* offset value needed */
354#ifdef DEBUG
355	int			matched;	/* matched the value */
356	int			seenzero;	/* saw a 0 bestfree entry */
357#endif
358
359	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
360
361#ifdef DEBUG
362	/*
363	 * Validate some consistency in the bestfree table.
364	 * Check order, non-overlapping entries, and if we find the
365	 * one we're looking for it has to be exact.
366	 */
367	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
368	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
369	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
370	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
371	for (dfp = &bf[0], seenzero = matched = 0;
372	     dfp < &bf[XFS_DIR2_DATA_FD_COUNT];
373	     dfp++) {
374		if (!dfp->offset) {
375			ASSERT(!dfp->length);
376			seenzero = 1;
377			continue;
378		}
379		ASSERT(seenzero == 0);
380		if (be16_to_cpu(dfp->offset) == off) {
381			matched = 1;
382			ASSERT(dfp->length == dup->length);
383		} else if (off < be16_to_cpu(dfp->offset))
384			ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset));
385		else
386			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
387		ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
388		if (dfp > &bf[0])
389			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
390	}
391#endif
392	/*
393	 * If this is smaller than the smallest bestfree entry,
394	 * it can't be there since they're sorted.
395	 */
396	if (be16_to_cpu(dup->length) <
397	    be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
398		return NULL;
399	/*
400	 * Look at the three bestfree entries for our guy.
401	 */
402	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
403		if (!dfp->offset)
404			return NULL;
405		if (be16_to_cpu(dfp->offset) == off)
406			return dfp;
407	}
408	/*
409	 * Didn't find it.  This only happens if there are duplicate lengths.
410	 */
411	return NULL;
412}
413
414/*
415 * Insert an unused-space entry into the bestfree table.
416 */
417xfs_dir2_data_free_t *				/* entry inserted */
418xfs_dir2_data_freeinsert(
419	struct xfs_dir2_data_hdr *hdr,		/* data block pointer */
420	struct xfs_dir2_data_free *dfp,		/* bestfree table pointer */
421	struct xfs_dir2_data_unused *dup,	/* unused space */
422	int			*loghead)	/* log the data header (out) */
423{
424	xfs_dir2_data_free_t	new;		/* new bestfree entry */
425
426	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
427	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
428	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
429	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
430
431	new.length = dup->length;
432	new.offset = cpu_to_be16((char *)dup - (char *)hdr);
433
434	/*
435	 * Insert at position 0, 1, or 2; or not at all.
436	 */
437	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) {
438		dfp[2] = dfp[1];
439		dfp[1] = dfp[0];
440		dfp[0] = new;
441		*loghead = 1;
442		return &dfp[0];
443	}
444	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) {
445		dfp[2] = dfp[1];
446		dfp[1] = new;
447		*loghead = 1;
448		return &dfp[1];
449	}
450	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) {
451		dfp[2] = new;
452		*loghead = 1;
453		return &dfp[2];
454	}
455	return NULL;
456}
457
458/*
459 * Remove a bestfree entry from the table.
460 */
461STATIC void
462xfs_dir2_data_freeremove(
463	struct xfs_dir2_data_hdr *hdr,		/* data block header */
464	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */
465	struct xfs_dir2_data_free *dfp,		/* bestfree entry pointer */
466	int			*loghead)	/* out: log data header */
467{
468
469	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
470	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
471	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
472	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
473
474	/*
475	 * It's the first entry, slide the next 2 up.
476	 */
477	if (dfp == &bf[0]) {
478		bf[0] = bf[1];
479		bf[1] = bf[2];
480	}
481	/*
482	 * It's the second entry, slide the 3rd entry up.
483	 */
484	else if (dfp == &bf[1])
485		bf[1] = bf[2];
486	/*
487	 * Must be the last entry.
488	 */
489	else
490		ASSERT(dfp == &bf[2]);
491	/*
492	 * Clear the 3rd entry, must be zero now.
493	 */
494	bf[2].length = 0;
495	bf[2].offset = 0;
496	*loghead = 1;
497}
498
499/*
500 * Given a data block, reconstruct its bestfree map.
501 */
502void
503xfs_dir2_data_freescan(
504	struct xfs_inode	*dp,
505	struct xfs_dir2_data_hdr *hdr,
506	int			*loghead)
507{
508	xfs_dir2_block_tail_t	*btp;		/* block tail */
509	xfs_dir2_data_entry_t	*dep;		/* active data entry */
510	xfs_dir2_data_unused_t	*dup;		/* unused data entry */
511	struct xfs_dir2_data_free *bf;
512	char			*endp;		/* end of block's data */
513	char			*p;		/* current entry pointer */
514	struct xfs_da_geometry	*geo = dp->i_mount->m_dir_geo;
515
516	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
517	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
518	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
519	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
520
521	/*
522	 * Start by clearing the table.
523	 */
524	bf = dp->d_ops->data_bestfree_p(hdr);
525	memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
526	*loghead = 1;
527	/*
528	 * Set up pointers.
529	 */
530	p = (char *)dp->d_ops->data_entry_p(hdr);
531	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
532	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
533		btp = xfs_dir2_block_tail_p(geo, hdr);
534		endp = (char *)xfs_dir2_block_leaf_p(btp);
535	} else
536		endp = (char *)hdr + geo->blksize;
537	/*
538	 * Loop over the block's entries.
539	 */
540	while (p < endp) {
541		dup = (xfs_dir2_data_unused_t *)p;
542		/*
543		 * If it's a free entry, insert it.
544		 */
545		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
546			ASSERT((char *)dup - (char *)hdr ==
547			       be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
548			xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);
549			p += be16_to_cpu(dup->length);
550		}
551		/*
552		 * For active entries, check their tags and skip them.
553		 */
554		else {
555			dep = (xfs_dir2_data_entry_t *)p;
556			ASSERT((char *)dep - (char *)hdr ==
557			       be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep)));
558			p += dp->d_ops->data_entsize(dep->namelen);
559		}
560	}
561}
562
563/*
564 * Initialize a data block at the given block number in the directory.
565 * Give back the buffer for the created block.
566 */
567int						/* error */
568xfs_dir3_data_init(
569	xfs_da_args_t		*args,		/* directory operation args */
570	xfs_dir2_db_t		blkno,		/* logical dir block number */
571	struct xfs_buf		**bpp)		/* output block buffer */
572{
573	struct xfs_buf		*bp;		/* block buffer */
574	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
575	xfs_inode_t		*dp;		/* incore directory inode */
576	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
577	struct xfs_dir2_data_free *bf;
578	int			error;		/* error return value */
579	int			i;		/* bestfree index */
580	xfs_mount_t		*mp;		/* filesystem mount point */
581	xfs_trans_t		*tp;		/* transaction pointer */
582	int                     t;              /* temp */
583
584	dp = args->dp;
585	mp = dp->i_mount;
586	tp = args->trans;
587	/*
588	 * Get the buffer set up for the block.
589	 */
590	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
591			       -1, &bp, XFS_DATA_FORK);
592	if (error)
593		return error;
594	bp->b_ops = &xfs_dir3_data_buf_ops;
595	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
596
597	/*
598	 * Initialize the header.
599	 */
600	hdr = bp->b_addr;
601	if (xfs_sb_version_hascrc(&mp->m_sb)) {
602		struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
603
604		memset(hdr3, 0, sizeof(*hdr3));
605		hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
606		hdr3->blkno = cpu_to_be64(bp->b_bn);
607		hdr3->owner = cpu_to_be64(dp->i_ino);
608		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
609
610	} else
611		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
612
613	bf = dp->d_ops->data_bestfree_p(hdr);
614	bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset);
615	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
616		bf[i].length = 0;
617		bf[i].offset = 0;
618	}
619
620	/*
621	 * Set up an unused entry for the block's body.
622	 */
623	dup = dp->d_ops->data_unused_p(hdr);
624	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
625
626	t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;
627	bf[0].length = cpu_to_be16(t);
628	dup->length = cpu_to_be16(t);
629	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
630	/*
631	 * Log it and return it.
632	 */
633	xfs_dir2_data_log_header(args, bp);
634	xfs_dir2_data_log_unused(args, bp, dup);
635	*bpp = bp;
636	return 0;
637}
638
639/*
640 * Log an active data entry from the block.
641 */
642void
643xfs_dir2_data_log_entry(
644	struct xfs_da_args	*args,
645	struct xfs_buf		*bp,
646	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
647{
648	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
649
650	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
651	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
652	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
653	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
654
655	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
656		(uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -
657		       (char *)hdr - 1));
658}
659
660/*
661 * Log a data block header.
662 */
663void
664xfs_dir2_data_log_header(
665	struct xfs_da_args	*args,
666	struct xfs_buf		*bp)
667{
668#ifdef DEBUG
669	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
670
671	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
672	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
673	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
674	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
675#endif
676
677	xfs_trans_log_buf(args->trans, bp, 0,
678			  args->dp->d_ops->data_entry_offset - 1);
679}
680
681/*
682 * Log a data unused entry.
683 */
684void
685xfs_dir2_data_log_unused(
686	struct xfs_da_args	*args,
687	struct xfs_buf		*bp,
688	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */
689{
690	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
691
692	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
693	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
694	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
695	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
696
697	/*
698	 * Log the first part of the unused entry.
699	 */
700	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),
701		(uint)((char *)&dup->length + sizeof(dup->length) -
702		       1 - (char *)hdr));
703	/*
704	 * Log the end (tag) of the unused entry.
705	 */
706	xfs_trans_log_buf(args->trans, bp,
707		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
708		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
709		       sizeof(xfs_dir2_data_off_t) - 1));
710}
711
712/*
713 * Make a byte range in the data block unused.
714 * Its current contents are unimportant.
715 */
716void
717xfs_dir2_data_make_free(
718	struct xfs_da_args	*args,
719	struct xfs_buf		*bp,
720	xfs_dir2_data_aoff_t	offset,		/* starting byte offset */
721	xfs_dir2_data_aoff_t	len,		/* length in bytes */
722	int			*needlogp,	/* out: log header */
723	int			*needscanp)	/* out: regen bestfree */
724{
725	xfs_dir2_data_hdr_t	*hdr;		/* data block pointer */
726	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
727	char			*endptr;	/* end of data area */
728	int			needscan;	/* need to regen bestfree */
729	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
730	xfs_dir2_data_unused_t	*postdup;	/* unused entry after us */
731	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
732	struct xfs_dir2_data_free *bf;
733
734	hdr = bp->b_addr;
735
736	/*
737	 * Figure out where the end of the data area is.
738	 */
739	if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
740	    hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
741		endptr = (char *)hdr + args->geo->blksize;
742	else {
743		xfs_dir2_block_tail_t	*btp;	/* block tail */
744
745		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
746			hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
747		btp = xfs_dir2_block_tail_p(args->geo, hdr);
748		endptr = (char *)xfs_dir2_block_leaf_p(btp);
749	}
750	/*
751	 * If this isn't the start of the block, then back up to
752	 * the previous entry and see if it's free.
753	 */
754	if (offset > args->dp->d_ops->data_entry_offset) {
755		__be16			*tagp;	/* tag just before us */
756
757		tagp = (__be16 *)((char *)hdr + offset) - 1;
758		prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
759		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
760			prevdup = NULL;
761	} else
762		prevdup = NULL;
763	/*
764	 * If this isn't the end of the block, see if the entry after
765	 * us is free.
766	 */
767	if ((char *)hdr + offset + len < endptr) {
768		postdup =
769			(xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
770		if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
771			postdup = NULL;
772	} else
773		postdup = NULL;
774	ASSERT(*needscanp == 0);
775	needscan = 0;
776	/*
777	 * Previous and following entries are both free,
778	 * merge everything into a single free entry.
779	 */
780	bf = args->dp->d_ops->data_bestfree_p(hdr);
781	if (prevdup && postdup) {
782		xfs_dir2_data_free_t	*dfp2;	/* another bestfree pointer */
783
784		/*
785		 * See if prevdup and/or postdup are in bestfree table.
786		 */
787		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
788		dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup);
789		/*
790		 * We need a rescan unless there are exactly 2 free entries
791		 * namely our two.  Then we know what's happening, otherwise
792		 * since the third bestfree is there, there might be more
793		 * entries.
794		 */
795		needscan = (bf[2].length != 0);
796		/*
797		 * Fix up the new big freespace.
798		 */
799		be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
800		*xfs_dir2_data_unused_tag_p(prevdup) =
801			cpu_to_be16((char *)prevdup - (char *)hdr);
802		xfs_dir2_data_log_unused(args, bp, prevdup);
803		if (!needscan) {
804			/*
805			 * Has to be the case that entries 0 and 1 are
806			 * dfp and dfp2 (don't know which is which), and
807			 * entry 2 is empty.
808			 * Remove entry 1 first then entry 0.
809			 */
810			ASSERT(dfp && dfp2);
811			if (dfp == &bf[1]) {
812				dfp = &bf[0];
813				ASSERT(dfp2 == dfp);
814				dfp2 = &bf[1];
815			}
816			xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp);
817			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
818			/*
819			 * Now insert the new entry.
820			 */
821			dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup,
822						       needlogp);
823			ASSERT(dfp == &bf[0]);
824			ASSERT(dfp->length == prevdup->length);
825			ASSERT(!dfp[1].length);
826			ASSERT(!dfp[2].length);
827		}
828	}
829	/*
830	 * The entry before us is free, merge with it.
831	 */
832	else if (prevdup) {
833		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
834		be16_add_cpu(&prevdup->length, len);
835		*xfs_dir2_data_unused_tag_p(prevdup) =
836			cpu_to_be16((char *)prevdup - (char *)hdr);
837		xfs_dir2_data_log_unused(args, bp, prevdup);
838		/*
839		 * If the previous entry was in the table, the new entry
840		 * is longer, so it will be in the table too.  Remove
841		 * the old one and add the new one.
842		 */
843		if (dfp) {
844			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
845			xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp);
846		}
847		/*
848		 * Otherwise we need a scan if the new entry is big enough.
849		 */
850		else {
851			needscan = be16_to_cpu(prevdup->length) >
852				   be16_to_cpu(bf[2].length);
853		}
854	}
855	/*
856	 * The following entry is free, merge with it.
857	 */
858	else if (postdup) {
859		dfp = xfs_dir2_data_freefind(hdr, bf, postdup);
860		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
861		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
862		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
863		*xfs_dir2_data_unused_tag_p(newdup) =
864			cpu_to_be16((char *)newdup - (char *)hdr);
865		xfs_dir2_data_log_unused(args, bp, newdup);
866		/*
867		 * If the following entry was in the table, the new entry
868		 * is longer, so it will be in the table too.  Remove
869		 * the old one and add the new one.
870		 */
871		if (dfp) {
872			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
873			xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
874		}
875		/*
876		 * Otherwise we need a scan if the new entry is big enough.
877		 */
878		else {
879			needscan = be16_to_cpu(newdup->length) >
880				   be16_to_cpu(bf[2].length);
881		}
882	}
883	/*
884	 * Neither neighbor is free.  Make a new entry.
885	 */
886	else {
887		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
888		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
889		newdup->length = cpu_to_be16(len);
890		*xfs_dir2_data_unused_tag_p(newdup) =
891			cpu_to_be16((char *)newdup - (char *)hdr);
892		xfs_dir2_data_log_unused(args, bp, newdup);
893		xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
894	}
895	*needscanp = needscan;
896}
897
898/*
899 * Take a byte range out of an existing unused space and make it un-free.
900 */
901void
902xfs_dir2_data_use_free(
903	struct xfs_da_args	*args,
904	struct xfs_buf		*bp,
905	xfs_dir2_data_unused_t	*dup,		/* unused entry */
906	xfs_dir2_data_aoff_t	offset,		/* starting offset to use */
907	xfs_dir2_data_aoff_t	len,		/* length to use */
908	int			*needlogp,	/* out: need to log header */
909	int			*needscanp)	/* out: need regen bestfree */
910{
911	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
912	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
913	int			matchback;	/* matches end of freespace */
914	int			matchfront;	/* matches start of freespace */
915	int			needscan;	/* need to regen bestfree */
916	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
917	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
918	int			oldlen;		/* old unused entry's length */
919	struct xfs_dir2_data_free *bf;
920
921	hdr = bp->b_addr;
922	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
923	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
924	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
925	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
926	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
927	ASSERT(offset >= (char *)dup - (char *)hdr);
928	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
929	ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
930	/*
931	 * Look up the entry in the bestfree table.
932	 */
933	oldlen = be16_to_cpu(dup->length);
934	bf = args->dp->d_ops->data_bestfree_p(hdr);
935	dfp = xfs_dir2_data_freefind(hdr, bf, dup);
936	ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
937	/*
938	 * Check for alignment with front and back of the entry.
939	 */
940	matchfront = (char *)dup - (char *)hdr == offset;
941	matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
942	ASSERT(*needscanp == 0);
943	needscan = 0;
944	/*
945	 * If we matched it exactly we just need to get rid of it from
946	 * the bestfree table.
947	 */
948	if (matchfront && matchback) {
949		if (dfp) {
950			needscan = (bf[2].offset != 0);
951			if (!needscan)
952				xfs_dir2_data_freeremove(hdr, bf, dfp,
953							 needlogp);
954		}
955	}
956	/*
957	 * We match the first part of the entry.
958	 * Make a new entry with the remaining freespace.
959	 */
960	else if (matchfront) {
961		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
962		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
963		newdup->length = cpu_to_be16(oldlen - len);
964		*xfs_dir2_data_unused_tag_p(newdup) =
965			cpu_to_be16((char *)newdup - (char *)hdr);
966		xfs_dir2_data_log_unused(args, bp, newdup);
967		/*
968		 * If it was in the table, remove it and add the new one.
969		 */
970		if (dfp) {
971			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
972			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
973						       needlogp);
974			ASSERT(dfp != NULL);
975			ASSERT(dfp->length == newdup->length);
976			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
977			/*
978			 * If we got inserted at the last slot,
979			 * that means we don't know if there was a better
980			 * choice for the last slot, or not.  Rescan.
981			 */
982			needscan = dfp == &bf[2];
983		}
984	}
985	/*
986	 * We match the last part of the entry.
987	 * Trim the allocated space off the tail of the entry.
988	 */
989	else if (matchback) {
990		newdup = dup;
991		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
992		*xfs_dir2_data_unused_tag_p(newdup) =
993			cpu_to_be16((char *)newdup - (char *)hdr);
994		xfs_dir2_data_log_unused(args, bp, newdup);
995		/*
996		 * If it was in the table, remove it and add the new one.
997		 */
998		if (dfp) {
999			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
1000			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
1001						       needlogp);
1002			ASSERT(dfp != NULL);
1003			ASSERT(dfp->length == newdup->length);
1004			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
1005			/*
1006			 * If we got inserted at the last slot,
1007			 * that means we don't know if there was a better
1008			 * choice for the last slot, or not.  Rescan.
1009			 */
1010			needscan = dfp == &bf[2];
1011		}
1012	}
1013	/*
1014	 * Poking out the middle of an entry.
1015	 * Make two new entries.
1016	 */
1017	else {
1018		newdup = dup;
1019		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
1020		*xfs_dir2_data_unused_tag_p(newdup) =
1021			cpu_to_be16((char *)newdup - (char *)hdr);
1022		xfs_dir2_data_log_unused(args, bp, newdup);
1023		newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
1024		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
1025		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
1026		*xfs_dir2_data_unused_tag_p(newdup2) =
1027			cpu_to_be16((char *)newdup2 - (char *)hdr);
1028		xfs_dir2_data_log_unused(args, bp, newdup2);
1029		/*
1030		 * If the old entry was in the table, we need to scan
1031		 * if the 3rd entry was valid, since these entries
1032		 * are smaller than the old one.
1033		 * If we don't need to scan that means there were 1 or 2
1034		 * entries in the table, and removing the old and adding
1035		 * the 2 new will work.
1036		 */
1037		if (dfp) {
1038			needscan = (bf[2].length != 0);
1039			if (!needscan) {
1040				xfs_dir2_data_freeremove(hdr, bf, dfp,
1041							 needlogp);
1042				xfs_dir2_data_freeinsert(hdr, bf, newdup,
1043							 needlogp);
1044				xfs_dir2_data_freeinsert(hdr, bf, newdup2,
1045							 needlogp);
1046			}
1047		}
1048	}
1049	*needscanp = needscan;
1050}
1051