1/*
2 * linux/fs/ext3/xattr.c
3 *
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 *
6 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7 * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8 * Extended attributes for symlinks and special files added per
9 *  suggestion of Luka Renko <luka.renko@hermes.si>.
10 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
11 *  Red Hat Inc.
12 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
13 *  and Andreas Gruenbacher <agruen@suse.de>.
14 */
15
16/*
17 * Extended attributes are stored directly in inodes (on file systems with
18 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
19 * field contains the block number if an inode uses an additional block. All
20 * attributes must fit in the inode and one additional block. Blocks that
21 * contain the identical set of attributes may be shared among several inodes.
22 * Identical blocks are detected by keeping a cache of blocks that have
23 * recently been accessed.
24 *
25 * The attributes in inodes and on blocks have a different header; the entries
26 * are stored in the same format:
27 *
28 *   +------------------+
29 *   | header           |
30 *   | entry 1          | |
31 *   | entry 2          | | growing downwards
32 *   | entry 3          | v
33 *   | four null bytes  |
34 *   | . . .            |
35 *   | value 1          | ^
36 *   | value 3          | | growing upwards
37 *   | value 2          | |
38 *   +------------------+
39 *
40 * The header is followed by multiple entry descriptors. In disk blocks, the
41 * entry descriptors are kept sorted. In inodes, they are unsorted. The
42 * attribute values are aligned to the end of the block in no specific order.
43 *
44 * Locking strategy
45 * ----------------
46 * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
47 * EA blocks are only changed if they are exclusive to an inode, so
48 * holding xattr_sem also means that nothing but the EA block's reference
49 * count can change. Multiple writers to the same block are synchronized
50 * by the buffer lock.
51 */
52
53#include "ext3.h"
54#include <linux/mbcache.h>
55#include <linux/quotaops.h>
56#include "xattr.h"
57#include "acl.h"
58
59#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
60#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
61#define BFIRST(bh) ENTRY(BHDR(bh)+1)
62#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
63
64#define IHDR(inode, raw_inode) \
65	((struct ext3_xattr_ibody_header *) \
66		((void *)raw_inode + \
67		 EXT3_GOOD_OLD_INODE_SIZE + \
68		 EXT3_I(inode)->i_extra_isize))
69#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
70
71#ifdef EXT3_XATTR_DEBUG
72# define ea_idebug(inode, f...) do { \
73		printk(KERN_DEBUG "inode %s:%lu: ", \
74			inode->i_sb->s_id, inode->i_ino); \
75		printk(f); \
76		printk("\n"); \
77	} while (0)
78# define ea_bdebug(bh, f...) do { \
79		char b[BDEVNAME_SIZE]; \
80		printk(KERN_DEBUG "block %s:%lu: ", \
81			bdevname(bh->b_bdev, b), \
82			(unsigned long) bh->b_blocknr); \
83		printk(f); \
84		printk("\n"); \
85	} while (0)
86#else
87# define ea_idebug(f...)
88# define ea_bdebug(f...)
89#endif
90
91static void ext3_xattr_cache_insert(struct buffer_head *);
92static struct buffer_head *ext3_xattr_cache_find(struct inode *,
93						 struct ext3_xattr_header *,
94						 struct mb_cache_entry **);
95static void ext3_xattr_rehash(struct ext3_xattr_header *,
96			      struct ext3_xattr_entry *);
97static int ext3_xattr_list(struct dentry *dentry, char *buffer,
98			   size_t buffer_size);
99
100static struct mb_cache *ext3_xattr_cache;
101
102static const struct xattr_handler *ext3_xattr_handler_map[] = {
103	[EXT3_XATTR_INDEX_USER]		     = &ext3_xattr_user_handler,
104#ifdef CONFIG_EXT3_FS_POSIX_ACL
105	[EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
106	[EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
107#endif
108	[EXT3_XATTR_INDEX_TRUSTED]	     = &ext3_xattr_trusted_handler,
109#ifdef CONFIG_EXT3_FS_SECURITY
110	[EXT3_XATTR_INDEX_SECURITY]	     = &ext3_xattr_security_handler,
111#endif
112};
113
114const struct xattr_handler *ext3_xattr_handlers[] = {
115	&ext3_xattr_user_handler,
116	&ext3_xattr_trusted_handler,
117#ifdef CONFIG_EXT3_FS_POSIX_ACL
118	&posix_acl_access_xattr_handler,
119	&posix_acl_default_xattr_handler,
120#endif
121#ifdef CONFIG_EXT3_FS_SECURITY
122	&ext3_xattr_security_handler,
123#endif
124	NULL
125};
126
127static inline const struct xattr_handler *
128ext3_xattr_handler(int name_index)
129{
130	const struct xattr_handler *handler = NULL;
131
132	if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
133		handler = ext3_xattr_handler_map[name_index];
134	return handler;
135}
136
137/*
138 * Inode operation listxattr()
139 *
140 * d_inode(dentry)->i_mutex: don't care
141 */
142ssize_t
143ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
144{
145	return ext3_xattr_list(dentry, buffer, size);
146}
147
148static int
149ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
150{
151	while (!IS_LAST_ENTRY(entry)) {
152		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
153		if ((void *)next >= end)
154			return -EIO;
155		entry = next;
156	}
157	return 0;
158}
159
160static inline int
161ext3_xattr_check_block(struct buffer_head *bh)
162{
163	int error;
164
165	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
166	    BHDR(bh)->h_blocks != cpu_to_le32(1))
167		return -EIO;
168	error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
169	return error;
170}
171
172static inline int
173ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
174{
175	size_t value_size = le32_to_cpu(entry->e_value_size);
176
177	if (entry->e_value_block != 0 || value_size > size ||
178	    le16_to_cpu(entry->e_value_offs) + value_size > size)
179		return -EIO;
180	return 0;
181}
182
183static int
184ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
185		      const char *name, size_t size, int sorted)
186{
187	struct ext3_xattr_entry *entry;
188	size_t name_len;
189	int cmp = 1;
190
191	if (name == NULL)
192		return -EINVAL;
193	name_len = strlen(name);
194	entry = *pentry;
195	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
196		cmp = name_index - entry->e_name_index;
197		if (!cmp)
198			cmp = name_len - entry->e_name_len;
199		if (!cmp)
200			cmp = memcmp(name, entry->e_name, name_len);
201		if (cmp <= 0 && (sorted || cmp == 0))
202			break;
203	}
204	*pentry = entry;
205	if (!cmp && ext3_xattr_check_entry(entry, size))
206			return -EIO;
207	return cmp ? -ENODATA : 0;
208}
209
210static int
211ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
212		     void *buffer, size_t buffer_size)
213{
214	struct buffer_head *bh = NULL;
215	struct ext3_xattr_entry *entry;
216	size_t size;
217	int error;
218
219	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
220		  name_index, name, buffer, (long)buffer_size);
221
222	error = -ENODATA;
223	if (!EXT3_I(inode)->i_file_acl)
224		goto cleanup;
225	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
226	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
227	if (!bh)
228		goto cleanup;
229	ea_bdebug(bh, "b_count=%d, refcount=%d",
230		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
231	if (ext3_xattr_check_block(bh)) {
232bad_block:	ext3_error(inode->i_sb, __func__,
233			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
234			   EXT3_I(inode)->i_file_acl);
235		error = -EIO;
236		goto cleanup;
237	}
238	ext3_xattr_cache_insert(bh);
239	entry = BFIRST(bh);
240	error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
241	if (error == -EIO)
242		goto bad_block;
243	if (error)
244		goto cleanup;
245	size = le32_to_cpu(entry->e_value_size);
246	if (buffer) {
247		error = -ERANGE;
248		if (size > buffer_size)
249			goto cleanup;
250		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
251		       size);
252	}
253	error = size;
254
255cleanup:
256	brelse(bh);
257	return error;
258}
259
260static int
261ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
262		     void *buffer, size_t buffer_size)
263{
264	struct ext3_xattr_ibody_header *header;
265	struct ext3_xattr_entry *entry;
266	struct ext3_inode *raw_inode;
267	struct ext3_iloc iloc;
268	size_t size;
269	void *end;
270	int error;
271
272	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
273		return -ENODATA;
274	error = ext3_get_inode_loc(inode, &iloc);
275	if (error)
276		return error;
277	raw_inode = ext3_raw_inode(&iloc);
278	header = IHDR(inode, raw_inode);
279	entry = IFIRST(header);
280	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
281	error = ext3_xattr_check_names(entry, end);
282	if (error)
283		goto cleanup;
284	error = ext3_xattr_find_entry(&entry, name_index, name,
285				      end - (void *)entry, 0);
286	if (error)
287		goto cleanup;
288	size = le32_to_cpu(entry->e_value_size);
289	if (buffer) {
290		error = -ERANGE;
291		if (size > buffer_size)
292			goto cleanup;
293		memcpy(buffer, (void *)IFIRST(header) +
294		       le16_to_cpu(entry->e_value_offs), size);
295	}
296	error = size;
297
298cleanup:
299	brelse(iloc.bh);
300	return error;
301}
302
303/*
304 * ext3_xattr_get()
305 *
306 * Copy an extended attribute into the buffer
307 * provided, or compute the buffer size required.
308 * Buffer is NULL to compute the size of the buffer required.
309 *
310 * Returns a negative error number on failure, or the number of bytes
311 * used / required on success.
312 */
313int
314ext3_xattr_get(struct inode *inode, int name_index, const char *name,
315	       void *buffer, size_t buffer_size)
316{
317	int error;
318
319	down_read(&EXT3_I(inode)->xattr_sem);
320	error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
321				     buffer_size);
322	if (error == -ENODATA)
323		error = ext3_xattr_block_get(inode, name_index, name, buffer,
324					     buffer_size);
325	up_read(&EXT3_I(inode)->xattr_sem);
326	return error;
327}
328
329static int
330ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
331			char *buffer, size_t buffer_size)
332{
333	size_t rest = buffer_size;
334
335	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
336		const struct xattr_handler *handler =
337			ext3_xattr_handler(entry->e_name_index);
338
339		if (handler) {
340			size_t size = handler->list(dentry, buffer, rest,
341						    entry->e_name,
342						    entry->e_name_len,
343						    handler->flags);
344			if (buffer) {
345				if (size > rest)
346					return -ERANGE;
347				buffer += size;
348			}
349			rest -= size;
350		}
351	}
352	return buffer_size - rest;
353}
354
355static int
356ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
357{
358	struct inode *inode = d_inode(dentry);
359	struct buffer_head *bh = NULL;
360	int error;
361
362	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
363		  buffer, (long)buffer_size);
364
365	error = 0;
366	if (!EXT3_I(inode)->i_file_acl)
367		goto cleanup;
368	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
369	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
370	error = -EIO;
371	if (!bh)
372		goto cleanup;
373	ea_bdebug(bh, "b_count=%d, refcount=%d",
374		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
375	if (ext3_xattr_check_block(bh)) {
376		ext3_error(inode->i_sb, __func__,
377			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
378			   EXT3_I(inode)->i_file_acl);
379		error = -EIO;
380		goto cleanup;
381	}
382	ext3_xattr_cache_insert(bh);
383	error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
384
385cleanup:
386	brelse(bh);
387
388	return error;
389}
390
391static int
392ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
393{
394	struct inode *inode = d_inode(dentry);
395	struct ext3_xattr_ibody_header *header;
396	struct ext3_inode *raw_inode;
397	struct ext3_iloc iloc;
398	void *end;
399	int error;
400
401	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
402		return 0;
403	error = ext3_get_inode_loc(inode, &iloc);
404	if (error)
405		return error;
406	raw_inode = ext3_raw_inode(&iloc);
407	header = IHDR(inode, raw_inode);
408	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
409	error = ext3_xattr_check_names(IFIRST(header), end);
410	if (error)
411		goto cleanup;
412	error = ext3_xattr_list_entries(dentry, IFIRST(header),
413					buffer, buffer_size);
414
415cleanup:
416	brelse(iloc.bh);
417	return error;
418}
419
420/*
421 * ext3_xattr_list()
422 *
423 * Copy a list of attribute names into the buffer
424 * provided, or compute the buffer size required.
425 * Buffer is NULL to compute the size of the buffer required.
426 *
427 * Returns a negative error number on failure, or the number of bytes
428 * used / required on success.
429 */
430static int
431ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
432{
433	int i_error, b_error;
434
435	down_read(&EXT3_I(d_inode(dentry))->xattr_sem);
436	i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
437	if (i_error < 0) {
438		b_error = 0;
439	} else {
440		if (buffer) {
441			buffer += i_error;
442			buffer_size -= i_error;
443		}
444		b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
445		if (b_error < 0)
446			i_error = 0;
447	}
448	up_read(&EXT3_I(d_inode(dentry))->xattr_sem);
449	return i_error + b_error;
450}
451
452/*
453 * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
454 * not set, set it.
455 */
456static void ext3_xattr_update_super_block(handle_t *handle,
457					  struct super_block *sb)
458{
459	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
460		return;
461
462	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
463		EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
464		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
465	}
466}
467
468/*
469 * Release the xattr block BH: If the reference count is > 1, decrement
470 * it; otherwise free the block.
471 */
472static void
473ext3_xattr_release_block(handle_t *handle, struct inode *inode,
474			 struct buffer_head *bh)
475{
476	struct mb_cache_entry *ce = NULL;
477	int error = 0;
478
479	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
480	error = ext3_journal_get_write_access(handle, bh);
481	if (error)
482		 goto out;
483
484	lock_buffer(bh);
485
486	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
487		ea_bdebug(bh, "refcount now=0; freeing");
488		if (ce)
489			mb_cache_entry_free(ce);
490		ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
491		get_bh(bh);
492		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
493	} else {
494		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
495		error = ext3_journal_dirty_metadata(handle, bh);
496		if (IS_SYNC(inode))
497			handle->h_sync = 1;
498		dquot_free_block(inode, 1);
499		ea_bdebug(bh, "refcount now=%d; releasing",
500			  le32_to_cpu(BHDR(bh)->h_refcount));
501		if (ce)
502			mb_cache_entry_release(ce);
503	}
504	unlock_buffer(bh);
505out:
506	ext3_std_error(inode->i_sb, error);
507	return;
508}
509
510struct ext3_xattr_info {
511	int name_index;
512	const char *name;
513	const void *value;
514	size_t value_len;
515};
516
517struct ext3_xattr_search {
518	struct ext3_xattr_entry *first;
519	void *base;
520	void *end;
521	struct ext3_xattr_entry *here;
522	int not_found;
523};
524
525static int
526ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
527{
528	struct ext3_xattr_entry *last;
529	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
530
531	/* Compute min_offs and last. */
532	last = s->first;
533	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
534		if (!last->e_value_block && last->e_value_size) {
535			size_t offs = le16_to_cpu(last->e_value_offs);
536			if (offs < min_offs)
537				min_offs = offs;
538		}
539	}
540	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
541	if (!s->not_found) {
542		if (!s->here->e_value_block && s->here->e_value_size) {
543			size_t size = le32_to_cpu(s->here->e_value_size);
544			free += EXT3_XATTR_SIZE(size);
545		}
546		free += EXT3_XATTR_LEN(name_len);
547	}
548	if (i->value) {
549		if (free < EXT3_XATTR_LEN(name_len) +
550			   EXT3_XATTR_SIZE(i->value_len))
551			return -ENOSPC;
552	}
553
554	if (i->value && s->not_found) {
555		/* Insert the new name. */
556		size_t size = EXT3_XATTR_LEN(name_len);
557		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
558		memmove((void *)s->here + size, s->here, rest);
559		memset(s->here, 0, size);
560		s->here->e_name_index = i->name_index;
561		s->here->e_name_len = name_len;
562		memcpy(s->here->e_name, i->name, name_len);
563	} else {
564		if (!s->here->e_value_block && s->here->e_value_size) {
565			void *first_val = s->base + min_offs;
566			size_t offs = le16_to_cpu(s->here->e_value_offs);
567			void *val = s->base + offs;
568			size_t size = EXT3_XATTR_SIZE(
569				le32_to_cpu(s->here->e_value_size));
570
571			if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
572				/* The old and the new value have the same
573				   size. Just replace. */
574				s->here->e_value_size =
575					cpu_to_le32(i->value_len);
576				memset(val + size - EXT3_XATTR_PAD, 0,
577				       EXT3_XATTR_PAD); /* Clear pad bytes. */
578				memcpy(val, i->value, i->value_len);
579				return 0;
580			}
581
582			/* Remove the old value. */
583			memmove(first_val + size, first_val, val - first_val);
584			memset(first_val, 0, size);
585			s->here->e_value_size = 0;
586			s->here->e_value_offs = 0;
587			min_offs += size;
588
589			/* Adjust all value offsets. */
590			last = s->first;
591			while (!IS_LAST_ENTRY(last)) {
592				size_t o = le16_to_cpu(last->e_value_offs);
593				if (!last->e_value_block &&
594				    last->e_value_size && o < offs)
595					last->e_value_offs =
596						cpu_to_le16(o + size);
597				last = EXT3_XATTR_NEXT(last);
598			}
599		}
600		if (!i->value) {
601			/* Remove the old name. */
602			size_t size = EXT3_XATTR_LEN(name_len);
603			last = ENTRY((void *)last - size);
604			memmove(s->here, (void *)s->here + size,
605				(void *)last - (void *)s->here + sizeof(__u32));
606			memset(last, 0, size);
607		}
608	}
609
610	if (i->value) {
611		/* Insert the new value. */
612		s->here->e_value_size = cpu_to_le32(i->value_len);
613		if (i->value_len) {
614			size_t size = EXT3_XATTR_SIZE(i->value_len);
615			void *val = s->base + min_offs - size;
616			s->here->e_value_offs = cpu_to_le16(min_offs - size);
617			memset(val + size - EXT3_XATTR_PAD, 0,
618			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
619			memcpy(val, i->value, i->value_len);
620		}
621	}
622	return 0;
623}
624
625struct ext3_xattr_block_find {
626	struct ext3_xattr_search s;
627	struct buffer_head *bh;
628};
629
630static int
631ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
632		      struct ext3_xattr_block_find *bs)
633{
634	struct super_block *sb = inode->i_sb;
635	int error;
636
637	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
638		  i->name_index, i->name, i->value, (long)i->value_len);
639
640	if (EXT3_I(inode)->i_file_acl) {
641		/* The inode already has an extended attribute block. */
642		bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
643		error = -EIO;
644		if (!bs->bh)
645			goto cleanup;
646		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
647			atomic_read(&(bs->bh->b_count)),
648			le32_to_cpu(BHDR(bs->bh)->h_refcount));
649		if (ext3_xattr_check_block(bs->bh)) {
650			ext3_error(sb, __func__,
651				"inode %lu: bad block "E3FSBLK, inode->i_ino,
652				EXT3_I(inode)->i_file_acl);
653			error = -EIO;
654			goto cleanup;
655		}
656		/* Find the named attribute. */
657		bs->s.base = BHDR(bs->bh);
658		bs->s.first = BFIRST(bs->bh);
659		bs->s.end = bs->bh->b_data + bs->bh->b_size;
660		bs->s.here = bs->s.first;
661		error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
662					      i->name, bs->bh->b_size, 1);
663		if (error && error != -ENODATA)
664			goto cleanup;
665		bs->s.not_found = error;
666	}
667	error = 0;
668
669cleanup:
670	return error;
671}
672
673static int
674ext3_xattr_block_set(handle_t *handle, struct inode *inode,
675		     struct ext3_xattr_info *i,
676		     struct ext3_xattr_block_find *bs)
677{
678	struct super_block *sb = inode->i_sb;
679	struct buffer_head *new_bh = NULL;
680	struct ext3_xattr_search *s = &bs->s;
681	struct mb_cache_entry *ce = NULL;
682	int error = 0;
683
684#define header(x) ((struct ext3_xattr_header *)(x))
685
686	if (i->value && i->value_len > sb->s_blocksize)
687		return -ENOSPC;
688	if (s->base) {
689		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
690					bs->bh->b_blocknr);
691		error = ext3_journal_get_write_access(handle, bs->bh);
692		if (error)
693			goto cleanup;
694		lock_buffer(bs->bh);
695
696		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
697			if (ce) {
698				mb_cache_entry_free(ce);
699				ce = NULL;
700			}
701			ea_bdebug(bs->bh, "modifying in-place");
702			error = ext3_xattr_set_entry(i, s);
703			if (!error) {
704				if (!IS_LAST_ENTRY(s->first))
705					ext3_xattr_rehash(header(s->base),
706							  s->here);
707				ext3_xattr_cache_insert(bs->bh);
708			}
709			unlock_buffer(bs->bh);
710			if (error == -EIO)
711				goto bad_block;
712			if (!error)
713				error = ext3_journal_dirty_metadata(handle,
714								    bs->bh);
715			if (error)
716				goto cleanup;
717			goto inserted;
718		} else {
719			int offset = (char *)s->here - bs->bh->b_data;
720
721			unlock_buffer(bs->bh);
722			journal_release_buffer(handle, bs->bh);
723
724			if (ce) {
725				mb_cache_entry_release(ce);
726				ce = NULL;
727			}
728			ea_bdebug(bs->bh, "cloning");
729			s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
730			error = -ENOMEM;
731			if (s->base == NULL)
732				goto cleanup;
733			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
734			s->first = ENTRY(header(s->base)+1);
735			header(s->base)->h_refcount = cpu_to_le32(1);
736			s->here = ENTRY(s->base + offset);
737			s->end = s->base + bs->bh->b_size;
738		}
739	} else {
740		/* Allocate a buffer where we construct the new block. */
741		s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
742		/* assert(header == s->base) */
743		error = -ENOMEM;
744		if (s->base == NULL)
745			goto cleanup;
746		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
747		header(s->base)->h_blocks = cpu_to_le32(1);
748		header(s->base)->h_refcount = cpu_to_le32(1);
749		s->first = ENTRY(header(s->base)+1);
750		s->here = ENTRY(header(s->base)+1);
751		s->end = s->base + sb->s_blocksize;
752	}
753
754	error = ext3_xattr_set_entry(i, s);
755	if (error == -EIO)
756		goto bad_block;
757	if (error)
758		goto cleanup;
759	if (!IS_LAST_ENTRY(s->first))
760		ext3_xattr_rehash(header(s->base), s->here);
761
762inserted:
763	if (!IS_LAST_ENTRY(s->first)) {
764		new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
765		if (new_bh) {
766			/* We found an identical block in the cache. */
767			if (new_bh == bs->bh)
768				ea_bdebug(new_bh, "keeping");
769			else {
770				/* The old block is released after updating
771				   the inode. */
772				error = dquot_alloc_block(inode, 1);
773				if (error)
774					goto cleanup;
775				error = ext3_journal_get_write_access(handle,
776								      new_bh);
777				if (error)
778					goto cleanup_dquot;
779				lock_buffer(new_bh);
780				le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
781				ea_bdebug(new_bh, "reusing; refcount now=%d",
782					le32_to_cpu(BHDR(new_bh)->h_refcount));
783				unlock_buffer(new_bh);
784				error = ext3_journal_dirty_metadata(handle,
785								    new_bh);
786				if (error)
787					goto cleanup_dquot;
788			}
789			mb_cache_entry_release(ce);
790			ce = NULL;
791		} else if (bs->bh && s->base == bs->bh->b_data) {
792			/* We were modifying this block in-place. */
793			ea_bdebug(bs->bh, "keeping this block");
794			new_bh = bs->bh;
795			get_bh(new_bh);
796		} else {
797			/* We need to allocate a new block */
798			ext3_fsblk_t goal = ext3_group_first_block_no(sb,
799						EXT3_I(inode)->i_block_group);
800			ext3_fsblk_t block;
801
802			/*
803			 * Protect us agaist concurrent allocations to the
804			 * same inode from ext3_..._writepage(). Reservation
805			 * code does not expect racing allocations.
806			 */
807			mutex_lock(&EXT3_I(inode)->truncate_mutex);
808			block = ext3_new_block(handle, inode, goal, &error);
809			mutex_unlock(&EXT3_I(inode)->truncate_mutex);
810			if (error)
811				goto cleanup;
812			ea_idebug(inode, "creating block %d", block);
813
814			new_bh = sb_getblk(sb, block);
815			if (unlikely(!new_bh)) {
816getblk_failed:
817				ext3_free_blocks(handle, inode, block, 1);
818				error = -ENOMEM;
819				goto cleanup;
820			}
821			lock_buffer(new_bh);
822			error = ext3_journal_get_create_access(handle, new_bh);
823			if (error) {
824				unlock_buffer(new_bh);
825				goto getblk_failed;
826			}
827			memcpy(new_bh->b_data, s->base, new_bh->b_size);
828			set_buffer_uptodate(new_bh);
829			unlock_buffer(new_bh);
830			ext3_xattr_cache_insert(new_bh);
831			error = ext3_journal_dirty_metadata(handle, new_bh);
832			if (error)
833				goto cleanup;
834		}
835	}
836
837	/* Update the inode. */
838	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
839
840	/* Drop the previous xattr block. */
841	if (bs->bh && bs->bh != new_bh)
842		ext3_xattr_release_block(handle, inode, bs->bh);
843	error = 0;
844
845cleanup:
846	if (ce)
847		mb_cache_entry_release(ce);
848	brelse(new_bh);
849	if (!(bs->bh && s->base == bs->bh->b_data))
850		kfree(s->base);
851
852	return error;
853
854cleanup_dquot:
855	dquot_free_block(inode, 1);
856	goto cleanup;
857
858bad_block:
859	ext3_error(inode->i_sb, __func__,
860		   "inode %lu: bad block "E3FSBLK, inode->i_ino,
861		   EXT3_I(inode)->i_file_acl);
862	goto cleanup;
863
864#undef header
865}
866
867struct ext3_xattr_ibody_find {
868	struct ext3_xattr_search s;
869	struct ext3_iloc iloc;
870};
871
872static int
873ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
874		      struct ext3_xattr_ibody_find *is)
875{
876	struct ext3_xattr_ibody_header *header;
877	struct ext3_inode *raw_inode;
878	int error;
879
880	if (EXT3_I(inode)->i_extra_isize == 0)
881		return 0;
882	raw_inode = ext3_raw_inode(&is->iloc);
883	header = IHDR(inode, raw_inode);
884	is->s.base = is->s.first = IFIRST(header);
885	is->s.here = is->s.first;
886	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
887	if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) {
888		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
889		if (error)
890			return error;
891		/* Find the named attribute. */
892		error = ext3_xattr_find_entry(&is->s.here, i->name_index,
893					      i->name, is->s.end -
894					      (void *)is->s.base, 0);
895		if (error && error != -ENODATA)
896			return error;
897		is->s.not_found = error;
898	}
899	return 0;
900}
901
902static int
903ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
904		     struct ext3_xattr_info *i,
905		     struct ext3_xattr_ibody_find *is)
906{
907	struct ext3_xattr_ibody_header *header;
908	struct ext3_xattr_search *s = &is->s;
909	int error;
910
911	if (EXT3_I(inode)->i_extra_isize == 0)
912		return -ENOSPC;
913	error = ext3_xattr_set_entry(i, s);
914	if (error)
915		return error;
916	header = IHDR(inode, ext3_raw_inode(&is->iloc));
917	if (!IS_LAST_ENTRY(s->first)) {
918		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
919		ext3_set_inode_state(inode, EXT3_STATE_XATTR);
920	} else {
921		header->h_magic = cpu_to_le32(0);
922		ext3_clear_inode_state(inode, EXT3_STATE_XATTR);
923	}
924	return 0;
925}
926
927/*
928 * ext3_xattr_set_handle()
929 *
930 * Create, replace or remove an extended attribute for this inode.  Value
931 * is NULL to remove an existing extended attribute, and non-NULL to
932 * either replace an existing extended attribute, or create a new extended
933 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
934 * specify that an extended attribute must exist and must not exist
935 * previous to the call, respectively.
936 *
937 * Returns 0, or a negative error number on failure.
938 */
939int
940ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
941		      const char *name, const void *value, size_t value_len,
942		      int flags)
943{
944	struct ext3_xattr_info i = {
945		.name_index = name_index,
946		.name = name,
947		.value = value,
948		.value_len = value_len,
949
950	};
951	struct ext3_xattr_ibody_find is = {
952		.s = { .not_found = -ENODATA, },
953	};
954	struct ext3_xattr_block_find bs = {
955		.s = { .not_found = -ENODATA, },
956	};
957	int error;
958
959	if (!name)
960		return -EINVAL;
961	if (strlen(name) > 255)
962		return -ERANGE;
963	down_write(&EXT3_I(inode)->xattr_sem);
964	error = ext3_get_inode_loc(inode, &is.iloc);
965	if (error)
966		goto cleanup;
967
968	error = ext3_journal_get_write_access(handle, is.iloc.bh);
969	if (error)
970		goto cleanup;
971
972	if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) {
973		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
974		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
975		ext3_clear_inode_state(inode, EXT3_STATE_NEW);
976	}
977
978	error = ext3_xattr_ibody_find(inode, &i, &is);
979	if (error)
980		goto cleanup;
981	if (is.s.not_found)
982		error = ext3_xattr_block_find(inode, &i, &bs);
983	if (error)
984		goto cleanup;
985	if (is.s.not_found && bs.s.not_found) {
986		error = -ENODATA;
987		if (flags & XATTR_REPLACE)
988			goto cleanup;
989		error = 0;
990		if (!value)
991			goto cleanup;
992	} else {
993		error = -EEXIST;
994		if (flags & XATTR_CREATE)
995			goto cleanup;
996	}
997	if (!value) {
998		if (!is.s.not_found)
999			error = ext3_xattr_ibody_set(handle, inode, &i, &is);
1000		else if (!bs.s.not_found)
1001			error = ext3_xattr_block_set(handle, inode, &i, &bs);
1002	} else {
1003		error = ext3_xattr_ibody_set(handle, inode, &i, &is);
1004		if (!error && !bs.s.not_found) {
1005			i.value = NULL;
1006			error = ext3_xattr_block_set(handle, inode, &i, &bs);
1007		} else if (error == -ENOSPC) {
1008			if (EXT3_I(inode)->i_file_acl && !bs.s.base) {
1009				error = ext3_xattr_block_find(inode, &i, &bs);
1010				if (error)
1011					goto cleanup;
1012			}
1013			error = ext3_xattr_block_set(handle, inode, &i, &bs);
1014			if (error)
1015				goto cleanup;
1016			if (!is.s.not_found) {
1017				i.value = NULL;
1018				error = ext3_xattr_ibody_set(handle, inode, &i,
1019							     &is);
1020			}
1021		}
1022	}
1023	if (!error) {
1024		ext3_xattr_update_super_block(handle, inode->i_sb);
1025		inode->i_ctime = CURRENT_TIME_SEC;
1026		error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
1027		/*
1028		 * The bh is consumed by ext3_mark_iloc_dirty, even with
1029		 * error != 0.
1030		 */
1031		is.iloc.bh = NULL;
1032		if (IS_SYNC(inode))
1033			handle->h_sync = 1;
1034	}
1035
1036cleanup:
1037	brelse(is.iloc.bh);
1038	brelse(bs.bh);
1039	up_write(&EXT3_I(inode)->xattr_sem);
1040	return error;
1041}
1042
1043/*
1044 * ext3_xattr_set()
1045 *
1046 * Like ext3_xattr_set_handle, but start from an inode. This extended
1047 * attribute modification is a filesystem transaction by itself.
1048 *
1049 * Returns 0, or a negative error number on failure.
1050 */
1051int
1052ext3_xattr_set(struct inode *inode, int name_index, const char *name,
1053	       const void *value, size_t value_len, int flags)
1054{
1055	handle_t *handle;
1056	int error, retries = 0;
1057
1058retry:
1059	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
1060	if (IS_ERR(handle)) {
1061		error = PTR_ERR(handle);
1062	} else {
1063		int error2;
1064
1065		error = ext3_xattr_set_handle(handle, inode, name_index, name,
1066					      value, value_len, flags);
1067		error2 = ext3_journal_stop(handle);
1068		if (error == -ENOSPC &&
1069		    ext3_should_retry_alloc(inode->i_sb, &retries))
1070			goto retry;
1071		if (error == 0)
1072			error = error2;
1073	}
1074
1075	return error;
1076}
1077
1078/*
1079 * ext3_xattr_delete_inode()
1080 *
1081 * Free extended attribute resources associated with this inode. This
1082 * is called immediately before an inode is freed. We have exclusive
1083 * access to the inode.
1084 */
1085void
1086ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1087{
1088	struct buffer_head *bh = NULL;
1089
1090	if (!EXT3_I(inode)->i_file_acl)
1091		goto cleanup;
1092	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
1093	if (!bh) {
1094		ext3_error(inode->i_sb, __func__,
1095			"inode %lu: block "E3FSBLK" read error", inode->i_ino,
1096			EXT3_I(inode)->i_file_acl);
1097		goto cleanup;
1098	}
1099	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
1100	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1101		ext3_error(inode->i_sb, __func__,
1102			"inode %lu: bad block "E3FSBLK, inode->i_ino,
1103			EXT3_I(inode)->i_file_acl);
1104		goto cleanup;
1105	}
1106	ext3_xattr_release_block(handle, inode, bh);
1107	EXT3_I(inode)->i_file_acl = 0;
1108
1109cleanup:
1110	brelse(bh);
1111}
1112
1113/*
1114 * ext3_xattr_put_super()
1115 *
1116 * This is called when a file system is unmounted.
1117 */
1118void
1119ext3_xattr_put_super(struct super_block *sb)
1120{
1121	mb_cache_shrink(sb->s_bdev);
1122}
1123
1124/*
1125 * ext3_xattr_cache_insert()
1126 *
1127 * Create a new entry in the extended attribute cache, and insert
1128 * it unless such an entry is already in the cache.
1129 *
1130 * Returns 0, or a negative error number on failure.
1131 */
1132static void
1133ext3_xattr_cache_insert(struct buffer_head *bh)
1134{
1135	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1136	struct mb_cache_entry *ce;
1137	int error;
1138
1139	ce = mb_cache_entry_alloc(ext3_xattr_cache, GFP_NOFS);
1140	if (!ce) {
1141		ea_bdebug(bh, "out of memory");
1142		return;
1143	}
1144	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1145	if (error) {
1146		mb_cache_entry_free(ce);
1147		if (error == -EBUSY) {
1148			ea_bdebug(bh, "already in cache");
1149			error = 0;
1150		}
1151	} else {
1152		ea_bdebug(bh, "inserting [%x]", (int)hash);
1153		mb_cache_entry_release(ce);
1154	}
1155}
1156
1157/*
1158 * ext3_xattr_cmp()
1159 *
1160 * Compare two extended attribute blocks for equality.
1161 *
1162 * Returns 0 if the blocks are equal, 1 if they differ, and
1163 * a negative error number on errors.
1164 */
1165static int
1166ext3_xattr_cmp(struct ext3_xattr_header *header1,
1167	       struct ext3_xattr_header *header2)
1168{
1169	struct ext3_xattr_entry *entry1, *entry2;
1170
1171	entry1 = ENTRY(header1+1);
1172	entry2 = ENTRY(header2+1);
1173	while (!IS_LAST_ENTRY(entry1)) {
1174		if (IS_LAST_ENTRY(entry2))
1175			return 1;
1176		if (entry1->e_hash != entry2->e_hash ||
1177		    entry1->e_name_index != entry2->e_name_index ||
1178		    entry1->e_name_len != entry2->e_name_len ||
1179		    entry1->e_value_size != entry2->e_value_size ||
1180		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1181			return 1;
1182		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1183			return -EIO;
1184		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1185			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1186			   le32_to_cpu(entry1->e_value_size)))
1187			return 1;
1188
1189		entry1 = EXT3_XATTR_NEXT(entry1);
1190		entry2 = EXT3_XATTR_NEXT(entry2);
1191	}
1192	if (!IS_LAST_ENTRY(entry2))
1193		return 1;
1194	return 0;
1195}
1196
1197/*
1198 * ext3_xattr_cache_find()
1199 *
1200 * Find an identical extended attribute block.
1201 *
1202 * Returns a pointer to the block found, or NULL if such a block was
1203 * not found or an error occurred.
1204 */
1205static struct buffer_head *
1206ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
1207		      struct mb_cache_entry **pce)
1208{
1209	__u32 hash = le32_to_cpu(header->h_hash);
1210	struct mb_cache_entry *ce;
1211
1212	if (!header->h_hash)
1213		return NULL;  /* never share */
1214	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1215again:
1216	ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev,
1217				       hash);
1218	while (ce) {
1219		struct buffer_head *bh;
1220
1221		if (IS_ERR(ce)) {
1222			if (PTR_ERR(ce) == -EAGAIN)
1223				goto again;
1224			break;
1225		}
1226		bh = sb_bread(inode->i_sb, ce->e_block);
1227		if (!bh) {
1228			ext3_error(inode->i_sb, __func__,
1229				"inode %lu: block %lu read error",
1230				inode->i_ino, (unsigned long) ce->e_block);
1231		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1232				EXT3_XATTR_REFCOUNT_MAX) {
1233			ea_idebug(inode, "block %lu refcount %d>=%d",
1234				  (unsigned long) ce->e_block,
1235				  le32_to_cpu(BHDR(bh)->h_refcount),
1236					  EXT3_XATTR_REFCOUNT_MAX);
1237		} else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
1238			*pce = ce;
1239			return bh;
1240		}
1241		brelse(bh);
1242		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1243	}
1244	return NULL;
1245}
1246
1247#define NAME_HASH_SHIFT 5
1248#define VALUE_HASH_SHIFT 16
1249
1250/*
1251 * ext3_xattr_hash_entry()
1252 *
1253 * Compute the hash of an extended attribute.
1254 */
1255static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
1256					 struct ext3_xattr_entry *entry)
1257{
1258	__u32 hash = 0;
1259	char *name = entry->e_name;
1260	int n;
1261
1262	for (n=0; n < entry->e_name_len; n++) {
1263		hash = (hash << NAME_HASH_SHIFT) ^
1264		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1265		       *name++;
1266	}
1267
1268	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1269		__le32 *value = (__le32 *)((char *)header +
1270			le16_to_cpu(entry->e_value_offs));
1271		for (n = (le32_to_cpu(entry->e_value_size) +
1272		     EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
1273			hash = (hash << VALUE_HASH_SHIFT) ^
1274			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1275			       le32_to_cpu(*value++);
1276		}
1277	}
1278	entry->e_hash = cpu_to_le32(hash);
1279}
1280
1281#undef NAME_HASH_SHIFT
1282#undef VALUE_HASH_SHIFT
1283
1284#define BLOCK_HASH_SHIFT 16
1285
1286/*
1287 * ext3_xattr_rehash()
1288 *
1289 * Re-compute the extended attribute hash value after an entry has changed.
1290 */
1291static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1292			      struct ext3_xattr_entry *entry)
1293{
1294	struct ext3_xattr_entry *here;
1295	__u32 hash = 0;
1296
1297	ext3_xattr_hash_entry(header, entry);
1298	here = ENTRY(header+1);
1299	while (!IS_LAST_ENTRY(here)) {
1300		if (!here->e_hash) {
1301			/* Block is not shared if an entry's hash value == 0 */
1302			hash = 0;
1303			break;
1304		}
1305		hash = (hash << BLOCK_HASH_SHIFT) ^
1306		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1307		       le32_to_cpu(here->e_hash);
1308		here = EXT3_XATTR_NEXT(here);
1309	}
1310	header->h_hash = cpu_to_le32(hash);
1311}
1312
1313#undef BLOCK_HASH_SHIFT
1314
1315int __init
1316init_ext3_xattr(void)
1317{
1318	ext3_xattr_cache = mb_cache_create("ext3_xattr", 6);
1319	if (!ext3_xattr_cache)
1320		return -ENOMEM;
1321	return 0;
1322}
1323
1324void
1325exit_ext3_xattr(void)
1326{
1327	if (ext3_xattr_cache)
1328		mb_cache_destroy(ext3_xattr_cache);
1329	ext3_xattr_cache = NULL;
1330}
1331