1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/file.h>
13#include <linux/sched.h>
14#include <linux/namei.h>
15#include <linux/slab.h>
16
17static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18{
19	struct fuse_conn *fc = get_fuse_conn(dir);
20	struct fuse_inode *fi = get_fuse_inode(dir);
21
22	if (!fc->do_readdirplus)
23		return false;
24	if (!fc->readdirplus_auto)
25		return true;
26	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27		return true;
28	if (ctx->pos == 0)
29		return true;
30	return false;
31}
32
33static void fuse_advise_use_readdirplus(struct inode *dir)
34{
35	struct fuse_inode *fi = get_fuse_inode(dir);
36
37	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38}
39
40#if BITS_PER_LONG >= 64
41static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
42{
43	entry->d_time = time;
44}
45
46static inline u64 fuse_dentry_time(struct dentry *entry)
47{
48	return entry->d_time;
49}
50#else
51/*
52 * On 32 bit archs store the high 32 bits of time in d_fsdata
53 */
54static void fuse_dentry_settime(struct dentry *entry, u64 time)
55{
56	entry->d_time = time;
57	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
58}
59
60static u64 fuse_dentry_time(struct dentry *entry)
61{
62	return (u64) entry->d_time +
63		((u64) (unsigned long) entry->d_fsdata << 32);
64}
65#endif
66
67/*
68 * FUSE caches dentries and attributes with separate timeout.  The
69 * time in jiffies until the dentry/attributes are valid is stored in
70 * dentry->d_time and fuse_inode->i_time respectively.
71 */
72
73/*
74 * Calculate the time in jiffies until a dentry/attributes are valid
75 */
76static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
77{
78	if (sec || nsec) {
79		struct timespec ts = {sec, nsec};
80		return get_jiffies_64() + timespec_to_jiffies(&ts);
81	} else
82		return 0;
83}
84
85/*
86 * Set dentry and possibly attribute timeouts from the lookup/mk*
87 * replies
88 */
89static void fuse_change_entry_timeout(struct dentry *entry,
90				      struct fuse_entry_out *o)
91{
92	fuse_dentry_settime(entry,
93		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
94}
95
96static u64 attr_timeout(struct fuse_attr_out *o)
97{
98	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
99}
100
101static u64 entry_attr_timeout(struct fuse_entry_out *o)
102{
103	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
104}
105
106/*
107 * Mark the attributes as stale, so that at the next call to
108 * ->getattr() they will be fetched from userspace
109 */
110void fuse_invalidate_attr(struct inode *inode)
111{
112	get_fuse_inode(inode)->i_time = 0;
113}
114
115/**
116 * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
117 * atime is not used.
118 */
119void fuse_invalidate_atime(struct inode *inode)
120{
121	if (!IS_RDONLY(inode))
122		fuse_invalidate_attr(inode);
123}
124
125/*
126 * Just mark the entry as stale, so that a next attempt to look it up
127 * will result in a new lookup call to userspace
128 *
129 * This is called when a dentry is about to become negative and the
130 * timeout is unknown (unlink, rmdir, rename and in some cases
131 * lookup)
132 */
133void fuse_invalidate_entry_cache(struct dentry *entry)
134{
135	fuse_dentry_settime(entry, 0);
136}
137
138/*
139 * Same as fuse_invalidate_entry_cache(), but also try to remove the
140 * dentry from the hash
141 */
142static void fuse_invalidate_entry(struct dentry *entry)
143{
144	d_invalidate(entry);
145	fuse_invalidate_entry_cache(entry);
146}
147
148static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
149			     u64 nodeid, struct qstr *name,
150			     struct fuse_entry_out *outarg)
151{
152	memset(outarg, 0, sizeof(struct fuse_entry_out));
153	args->in.h.opcode = FUSE_LOOKUP;
154	args->in.h.nodeid = nodeid;
155	args->in.numargs = 1;
156	args->in.args[0].size = name->len + 1;
157	args->in.args[0].value = name->name;
158	args->out.numargs = 1;
159	args->out.args[0].size = sizeof(struct fuse_entry_out);
160	args->out.args[0].value = outarg;
161}
162
163u64 fuse_get_attr_version(struct fuse_conn *fc)
164{
165	u64 curr_version;
166
167	/*
168	 * The spin lock isn't actually needed on 64bit archs, but we
169	 * don't yet care too much about such optimizations.
170	 */
171	spin_lock(&fc->lock);
172	curr_version = fc->attr_version;
173	spin_unlock(&fc->lock);
174
175	return curr_version;
176}
177
178/*
179 * Check whether the dentry is still valid
180 *
181 * If the entry validity timeout has expired and the dentry is
182 * positive, try to redo the lookup.  If the lookup results in a
183 * different inode, then let the VFS invalidate the dentry and redo
184 * the lookup once more.  If the lookup results in the same inode,
185 * then refresh the attributes, timeouts and mark the dentry valid.
186 */
187static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
188{
189	struct inode *inode;
190	struct dentry *parent;
191	struct fuse_conn *fc;
192	struct fuse_inode *fi;
193	int ret;
194
195	inode = d_inode_rcu(entry);
196	if (inode && is_bad_inode(inode))
197		goto invalid;
198	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
199		 (flags & LOOKUP_REVAL)) {
200		struct fuse_entry_out outarg;
201		FUSE_ARGS(args);
202		struct fuse_forget_link *forget;
203		u64 attr_version;
204
205		/* For negative dentries, always do a fresh lookup */
206		if (!inode)
207			goto invalid;
208
209		ret = -ECHILD;
210		if (flags & LOOKUP_RCU)
211			goto out;
212
213		fc = get_fuse_conn(inode);
214
215		forget = fuse_alloc_forget();
216		ret = -ENOMEM;
217		if (!forget)
218			goto out;
219
220		attr_version = fuse_get_attr_version(fc);
221
222		parent = dget_parent(entry);
223		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
224				 &entry->d_name, &outarg);
225		ret = fuse_simple_request(fc, &args);
226		dput(parent);
227		/* Zero nodeid is same as -ENOENT */
228		if (!ret && !outarg.nodeid)
229			ret = -ENOENT;
230		if (!ret) {
231			fi = get_fuse_inode(inode);
232			if (outarg.nodeid != get_node_id(inode)) {
233				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
234				goto invalid;
235			}
236			spin_lock(&fc->lock);
237			fi->nlookup++;
238			spin_unlock(&fc->lock);
239		}
240		kfree(forget);
241		if (ret == -ENOMEM)
242			goto out;
243		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
244			goto invalid;
245
246		fuse_change_attributes(inode, &outarg.attr,
247				       entry_attr_timeout(&outarg),
248				       attr_version);
249		fuse_change_entry_timeout(entry, &outarg);
250	} else if (inode) {
251		fi = get_fuse_inode(inode);
252		if (flags & LOOKUP_RCU) {
253			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
254				return -ECHILD;
255		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
256			parent = dget_parent(entry);
257			fuse_advise_use_readdirplus(d_inode(parent));
258			dput(parent);
259		}
260	}
261	ret = 1;
262out:
263	return ret;
264
265invalid:
266	ret = 0;
267	goto out;
268}
269
270static int invalid_nodeid(u64 nodeid)
271{
272	return !nodeid || nodeid == FUSE_ROOT_ID;
273}
274
275const struct dentry_operations fuse_dentry_operations = {
276	.d_revalidate	= fuse_dentry_revalidate,
277};
278
279int fuse_valid_type(int m)
280{
281	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
282		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
283}
284
285int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
286		     struct fuse_entry_out *outarg, struct inode **inode)
287{
288	struct fuse_conn *fc = get_fuse_conn_super(sb);
289	FUSE_ARGS(args);
290	struct fuse_forget_link *forget;
291	u64 attr_version;
292	int err;
293
294	*inode = NULL;
295	err = -ENAMETOOLONG;
296	if (name->len > FUSE_NAME_MAX)
297		goto out;
298
299
300	forget = fuse_alloc_forget();
301	err = -ENOMEM;
302	if (!forget)
303		goto out;
304
305	attr_version = fuse_get_attr_version(fc);
306
307	fuse_lookup_init(fc, &args, nodeid, name, outarg);
308	err = fuse_simple_request(fc, &args);
309	/* Zero nodeid is same as -ENOENT, but with valid timeout */
310	if (err || !outarg->nodeid)
311		goto out_put_forget;
312
313	err = -EIO;
314	if (!outarg->nodeid)
315		goto out_put_forget;
316	if (!fuse_valid_type(outarg->attr.mode))
317		goto out_put_forget;
318
319	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
320			   &outarg->attr, entry_attr_timeout(outarg),
321			   attr_version);
322	err = -ENOMEM;
323	if (!*inode) {
324		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
325		goto out;
326	}
327	err = 0;
328
329 out_put_forget:
330	kfree(forget);
331 out:
332	return err;
333}
334
335static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
336				  unsigned int flags)
337{
338	int err;
339	struct fuse_entry_out outarg;
340	struct inode *inode;
341	struct dentry *newent;
342	bool outarg_valid = true;
343
344	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
345			       &outarg, &inode);
346	if (err == -ENOENT) {
347		outarg_valid = false;
348		err = 0;
349	}
350	if (err)
351		goto out_err;
352
353	err = -EIO;
354	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
355		goto out_iput;
356
357	newent = d_splice_alias(inode, entry);
358	err = PTR_ERR(newent);
359	if (IS_ERR(newent))
360		goto out_err;
361
362	entry = newent ? newent : entry;
363	if (outarg_valid)
364		fuse_change_entry_timeout(entry, &outarg);
365	else
366		fuse_invalidate_entry_cache(entry);
367
368	fuse_advise_use_readdirplus(dir);
369	return newent;
370
371 out_iput:
372	iput(inode);
373 out_err:
374	return ERR_PTR(err);
375}
376
377/*
378 * Atomic create+open operation
379 *
380 * If the filesystem doesn't support this, then fall back to separate
381 * 'mknod' + 'open' requests.
382 */
383static int fuse_create_open(struct inode *dir, struct dentry *entry,
384			    struct file *file, unsigned flags,
385			    umode_t mode, int *opened)
386{
387	int err;
388	struct inode *inode;
389	struct fuse_conn *fc = get_fuse_conn(dir);
390	FUSE_ARGS(args);
391	struct fuse_forget_link *forget;
392	struct fuse_create_in inarg;
393	struct fuse_open_out outopen;
394	struct fuse_entry_out outentry;
395	struct fuse_file *ff;
396
397	/* Userspace expects S_IFREG in create mode */
398	BUG_ON((mode & S_IFMT) != S_IFREG);
399
400	forget = fuse_alloc_forget();
401	err = -ENOMEM;
402	if (!forget)
403		goto out_err;
404
405	err = -ENOMEM;
406	ff = fuse_file_alloc(fc);
407	if (!ff)
408		goto out_put_forget_req;
409
410	if (!fc->dont_mask)
411		mode &= ~current_umask();
412
413	flags &= ~O_NOCTTY;
414	memset(&inarg, 0, sizeof(inarg));
415	memset(&outentry, 0, sizeof(outentry));
416	inarg.flags = flags;
417	inarg.mode = mode;
418	inarg.umask = current_umask();
419	args.in.h.opcode = FUSE_CREATE;
420	args.in.h.nodeid = get_node_id(dir);
421	args.in.numargs = 2;
422	args.in.args[0].size = sizeof(inarg);
423	args.in.args[0].value = &inarg;
424	args.in.args[1].size = entry->d_name.len + 1;
425	args.in.args[1].value = entry->d_name.name;
426	args.out.numargs = 2;
427	args.out.args[0].size = sizeof(outentry);
428	args.out.args[0].value = &outentry;
429	args.out.args[1].size = sizeof(outopen);
430	args.out.args[1].value = &outopen;
431	err = fuse_simple_request(fc, &args);
432	if (err)
433		goto out_free_ff;
434
435	err = -EIO;
436	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
437		goto out_free_ff;
438
439	ff->fh = outopen.fh;
440	ff->nodeid = outentry.nodeid;
441	ff->open_flags = outopen.open_flags;
442	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
443			  &outentry.attr, entry_attr_timeout(&outentry), 0);
444	if (!inode) {
445		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
446		fuse_sync_release(ff, flags);
447		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
448		err = -ENOMEM;
449		goto out_err;
450	}
451	kfree(forget);
452	d_instantiate(entry, inode);
453	fuse_change_entry_timeout(entry, &outentry);
454	fuse_invalidate_attr(dir);
455	err = finish_open(file, entry, generic_file_open, opened);
456	if (err) {
457		fuse_sync_release(ff, flags);
458	} else {
459		file->private_data = fuse_file_get(ff);
460		fuse_finish_open(inode, file);
461	}
462	return err;
463
464out_free_ff:
465	fuse_file_free(ff);
466out_put_forget_req:
467	kfree(forget);
468out_err:
469	return err;
470}
471
472static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
473static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
474			    struct file *file, unsigned flags,
475			    umode_t mode, int *opened)
476{
477	int err;
478	struct fuse_conn *fc = get_fuse_conn(dir);
479	struct dentry *res = NULL;
480
481	if (d_unhashed(entry)) {
482		res = fuse_lookup(dir, entry, 0);
483		if (IS_ERR(res))
484			return PTR_ERR(res);
485
486		if (res)
487			entry = res;
488	}
489
490	if (!(flags & O_CREAT) || d_really_is_positive(entry))
491		goto no_open;
492
493	/* Only creates */
494	*opened |= FILE_CREATED;
495
496	if (fc->no_create)
497		goto mknod;
498
499	err = fuse_create_open(dir, entry, file, flags, mode, opened);
500	if (err == -ENOSYS) {
501		fc->no_create = 1;
502		goto mknod;
503	}
504out_dput:
505	dput(res);
506	return err;
507
508mknod:
509	err = fuse_mknod(dir, entry, mode, 0);
510	if (err)
511		goto out_dput;
512no_open:
513	return finish_no_open(file, res);
514}
515
516/*
517 * Code shared between mknod, mkdir, symlink and link
518 */
519static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
520			    struct inode *dir, struct dentry *entry,
521			    umode_t mode)
522{
523	struct fuse_entry_out outarg;
524	struct inode *inode;
525	int err;
526	struct fuse_forget_link *forget;
527
528	forget = fuse_alloc_forget();
529	if (!forget)
530		return -ENOMEM;
531
532	memset(&outarg, 0, sizeof(outarg));
533	args->in.h.nodeid = get_node_id(dir);
534	args->out.numargs = 1;
535	args->out.args[0].size = sizeof(outarg);
536	args->out.args[0].value = &outarg;
537	err = fuse_simple_request(fc, args);
538	if (err)
539		goto out_put_forget_req;
540
541	err = -EIO;
542	if (invalid_nodeid(outarg.nodeid))
543		goto out_put_forget_req;
544
545	if ((outarg.attr.mode ^ mode) & S_IFMT)
546		goto out_put_forget_req;
547
548	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
549			  &outarg.attr, entry_attr_timeout(&outarg), 0);
550	if (!inode) {
551		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
552		return -ENOMEM;
553	}
554	kfree(forget);
555
556	err = d_instantiate_no_diralias(entry, inode);
557	if (err)
558		return err;
559
560	fuse_change_entry_timeout(entry, &outarg);
561	fuse_invalidate_attr(dir);
562	return 0;
563
564 out_put_forget_req:
565	kfree(forget);
566	return err;
567}
568
569static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
570		      dev_t rdev)
571{
572	struct fuse_mknod_in inarg;
573	struct fuse_conn *fc = get_fuse_conn(dir);
574	FUSE_ARGS(args);
575
576	if (!fc->dont_mask)
577		mode &= ~current_umask();
578
579	memset(&inarg, 0, sizeof(inarg));
580	inarg.mode = mode;
581	inarg.rdev = new_encode_dev(rdev);
582	inarg.umask = current_umask();
583	args.in.h.opcode = FUSE_MKNOD;
584	args.in.numargs = 2;
585	args.in.args[0].size = sizeof(inarg);
586	args.in.args[0].value = &inarg;
587	args.in.args[1].size = entry->d_name.len + 1;
588	args.in.args[1].value = entry->d_name.name;
589	return create_new_entry(fc, &args, dir, entry, mode);
590}
591
592static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
593		       bool excl)
594{
595	return fuse_mknod(dir, entry, mode, 0);
596}
597
598static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
599{
600	struct fuse_mkdir_in inarg;
601	struct fuse_conn *fc = get_fuse_conn(dir);
602	FUSE_ARGS(args);
603
604	if (!fc->dont_mask)
605		mode &= ~current_umask();
606
607	memset(&inarg, 0, sizeof(inarg));
608	inarg.mode = mode;
609	inarg.umask = current_umask();
610	args.in.h.opcode = FUSE_MKDIR;
611	args.in.numargs = 2;
612	args.in.args[0].size = sizeof(inarg);
613	args.in.args[0].value = &inarg;
614	args.in.args[1].size = entry->d_name.len + 1;
615	args.in.args[1].value = entry->d_name.name;
616	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
617}
618
619static int fuse_symlink(struct inode *dir, struct dentry *entry,
620			const char *link)
621{
622	struct fuse_conn *fc = get_fuse_conn(dir);
623	unsigned len = strlen(link) + 1;
624	FUSE_ARGS(args);
625
626	args.in.h.opcode = FUSE_SYMLINK;
627	args.in.numargs = 2;
628	args.in.args[0].size = entry->d_name.len + 1;
629	args.in.args[0].value = entry->d_name.name;
630	args.in.args[1].size = len;
631	args.in.args[1].value = link;
632	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
633}
634
635static inline void fuse_update_ctime(struct inode *inode)
636{
637	if (!IS_NOCMTIME(inode)) {
638		inode->i_ctime = current_fs_time(inode->i_sb);
639		mark_inode_dirty_sync(inode);
640	}
641}
642
643static int fuse_unlink(struct inode *dir, struct dentry *entry)
644{
645	int err;
646	struct fuse_conn *fc = get_fuse_conn(dir);
647	FUSE_ARGS(args);
648
649	args.in.h.opcode = FUSE_UNLINK;
650	args.in.h.nodeid = get_node_id(dir);
651	args.in.numargs = 1;
652	args.in.args[0].size = entry->d_name.len + 1;
653	args.in.args[0].value = entry->d_name.name;
654	err = fuse_simple_request(fc, &args);
655	if (!err) {
656		struct inode *inode = d_inode(entry);
657		struct fuse_inode *fi = get_fuse_inode(inode);
658
659		spin_lock(&fc->lock);
660		fi->attr_version = ++fc->attr_version;
661		/*
662		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
663		 * happen if userspace filesystem is careless.  It would be
664		 * difficult to enforce correct nlink usage so just ignore this
665		 * condition here
666		 */
667		if (inode->i_nlink > 0)
668			drop_nlink(inode);
669		spin_unlock(&fc->lock);
670		fuse_invalidate_attr(inode);
671		fuse_invalidate_attr(dir);
672		fuse_invalidate_entry_cache(entry);
673		fuse_update_ctime(inode);
674	} else if (err == -EINTR)
675		fuse_invalidate_entry(entry);
676	return err;
677}
678
679static int fuse_rmdir(struct inode *dir, struct dentry *entry)
680{
681	int err;
682	struct fuse_conn *fc = get_fuse_conn(dir);
683	FUSE_ARGS(args);
684
685	args.in.h.opcode = FUSE_RMDIR;
686	args.in.h.nodeid = get_node_id(dir);
687	args.in.numargs = 1;
688	args.in.args[0].size = entry->d_name.len + 1;
689	args.in.args[0].value = entry->d_name.name;
690	err = fuse_simple_request(fc, &args);
691	if (!err) {
692		clear_nlink(d_inode(entry));
693		fuse_invalidate_attr(dir);
694		fuse_invalidate_entry_cache(entry);
695	} else if (err == -EINTR)
696		fuse_invalidate_entry(entry);
697	return err;
698}
699
700static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
701			      struct inode *newdir, struct dentry *newent,
702			      unsigned int flags, int opcode, size_t argsize)
703{
704	int err;
705	struct fuse_rename2_in inarg;
706	struct fuse_conn *fc = get_fuse_conn(olddir);
707	FUSE_ARGS(args);
708
709	memset(&inarg, 0, argsize);
710	inarg.newdir = get_node_id(newdir);
711	inarg.flags = flags;
712	args.in.h.opcode = opcode;
713	args.in.h.nodeid = get_node_id(olddir);
714	args.in.numargs = 3;
715	args.in.args[0].size = argsize;
716	args.in.args[0].value = &inarg;
717	args.in.args[1].size = oldent->d_name.len + 1;
718	args.in.args[1].value = oldent->d_name.name;
719	args.in.args[2].size = newent->d_name.len + 1;
720	args.in.args[2].value = newent->d_name.name;
721	err = fuse_simple_request(fc, &args);
722	if (!err) {
723		/* ctime changes */
724		fuse_invalidate_attr(d_inode(oldent));
725		fuse_update_ctime(d_inode(oldent));
726
727		if (flags & RENAME_EXCHANGE) {
728			fuse_invalidate_attr(d_inode(newent));
729			fuse_update_ctime(d_inode(newent));
730		}
731
732		fuse_invalidate_attr(olddir);
733		if (olddir != newdir)
734			fuse_invalidate_attr(newdir);
735
736		/* newent will end up negative */
737		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
738			fuse_invalidate_attr(d_inode(newent));
739			fuse_invalidate_entry_cache(newent);
740			fuse_update_ctime(d_inode(newent));
741		}
742	} else if (err == -EINTR) {
743		/* If request was interrupted, DEITY only knows if the
744		   rename actually took place.  If the invalidation
745		   fails (e.g. some process has CWD under the renamed
746		   directory), then there can be inconsistency between
747		   the dcache and the real filesystem.  Tough luck. */
748		fuse_invalidate_entry(oldent);
749		if (d_really_is_positive(newent))
750			fuse_invalidate_entry(newent);
751	}
752
753	return err;
754}
755
756static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
757			struct inode *newdir, struct dentry *newent,
758			unsigned int flags)
759{
760	struct fuse_conn *fc = get_fuse_conn(olddir);
761	int err;
762
763	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
764		return -EINVAL;
765
766	if (flags) {
767		if (fc->no_rename2 || fc->minor < 23)
768			return -EINVAL;
769
770		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
771					 FUSE_RENAME2,
772					 sizeof(struct fuse_rename2_in));
773		if (err == -ENOSYS) {
774			fc->no_rename2 = 1;
775			err = -EINVAL;
776		}
777	} else {
778		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
779					 FUSE_RENAME,
780					 sizeof(struct fuse_rename_in));
781	}
782
783	return err;
784}
785
786static int fuse_link(struct dentry *entry, struct inode *newdir,
787		     struct dentry *newent)
788{
789	int err;
790	struct fuse_link_in inarg;
791	struct inode *inode = d_inode(entry);
792	struct fuse_conn *fc = get_fuse_conn(inode);
793	FUSE_ARGS(args);
794
795	memset(&inarg, 0, sizeof(inarg));
796	inarg.oldnodeid = get_node_id(inode);
797	args.in.h.opcode = FUSE_LINK;
798	args.in.numargs = 2;
799	args.in.args[0].size = sizeof(inarg);
800	args.in.args[0].value = &inarg;
801	args.in.args[1].size = newent->d_name.len + 1;
802	args.in.args[1].value = newent->d_name.name;
803	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
804	/* Contrary to "normal" filesystems it can happen that link
805	   makes two "logical" inodes point to the same "physical"
806	   inode.  We invalidate the attributes of the old one, so it
807	   will reflect changes in the backing inode (link count,
808	   etc.)
809	*/
810	if (!err) {
811		struct fuse_inode *fi = get_fuse_inode(inode);
812
813		spin_lock(&fc->lock);
814		fi->attr_version = ++fc->attr_version;
815		inc_nlink(inode);
816		spin_unlock(&fc->lock);
817		fuse_invalidate_attr(inode);
818		fuse_update_ctime(inode);
819	} else if (err == -EINTR) {
820		fuse_invalidate_attr(inode);
821	}
822	return err;
823}
824
825static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
826			  struct kstat *stat)
827{
828	unsigned int blkbits;
829	struct fuse_conn *fc = get_fuse_conn(inode);
830
831	/* see the comment in fuse_change_attributes() */
832	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
833		attr->size = i_size_read(inode);
834		attr->mtime = inode->i_mtime.tv_sec;
835		attr->mtimensec = inode->i_mtime.tv_nsec;
836		attr->ctime = inode->i_ctime.tv_sec;
837		attr->ctimensec = inode->i_ctime.tv_nsec;
838	}
839
840	stat->dev = inode->i_sb->s_dev;
841	stat->ino = attr->ino;
842	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
843	stat->nlink = attr->nlink;
844	stat->uid = make_kuid(&init_user_ns, attr->uid);
845	stat->gid = make_kgid(&init_user_ns, attr->gid);
846	stat->rdev = inode->i_rdev;
847	stat->atime.tv_sec = attr->atime;
848	stat->atime.tv_nsec = attr->atimensec;
849	stat->mtime.tv_sec = attr->mtime;
850	stat->mtime.tv_nsec = attr->mtimensec;
851	stat->ctime.tv_sec = attr->ctime;
852	stat->ctime.tv_nsec = attr->ctimensec;
853	stat->size = attr->size;
854	stat->blocks = attr->blocks;
855
856	if (attr->blksize != 0)
857		blkbits = ilog2(attr->blksize);
858	else
859		blkbits = inode->i_sb->s_blocksize_bits;
860
861	stat->blksize = 1 << blkbits;
862}
863
864static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
865			   struct file *file)
866{
867	int err;
868	struct fuse_getattr_in inarg;
869	struct fuse_attr_out outarg;
870	struct fuse_conn *fc = get_fuse_conn(inode);
871	FUSE_ARGS(args);
872	u64 attr_version;
873
874	attr_version = fuse_get_attr_version(fc);
875
876	memset(&inarg, 0, sizeof(inarg));
877	memset(&outarg, 0, sizeof(outarg));
878	/* Directories have separate file-handle space */
879	if (file && S_ISREG(inode->i_mode)) {
880		struct fuse_file *ff = file->private_data;
881
882		inarg.getattr_flags |= FUSE_GETATTR_FH;
883		inarg.fh = ff->fh;
884	}
885	args.in.h.opcode = FUSE_GETATTR;
886	args.in.h.nodeid = get_node_id(inode);
887	args.in.numargs = 1;
888	args.in.args[0].size = sizeof(inarg);
889	args.in.args[0].value = &inarg;
890	args.out.numargs = 1;
891	args.out.args[0].size = sizeof(outarg);
892	args.out.args[0].value = &outarg;
893	err = fuse_simple_request(fc, &args);
894	if (!err) {
895		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
896			make_bad_inode(inode);
897			err = -EIO;
898		} else {
899			fuse_change_attributes(inode, &outarg.attr,
900					       attr_timeout(&outarg),
901					       attr_version);
902			if (stat)
903				fuse_fillattr(inode, &outarg.attr, stat);
904		}
905	}
906	return err;
907}
908
909int fuse_update_attributes(struct inode *inode, struct kstat *stat,
910			   struct file *file, bool *refreshed)
911{
912	struct fuse_inode *fi = get_fuse_inode(inode);
913	int err;
914	bool r;
915
916	if (time_before64(fi->i_time, get_jiffies_64())) {
917		r = true;
918		err = fuse_do_getattr(inode, stat, file);
919	} else {
920		r = false;
921		err = 0;
922		if (stat) {
923			generic_fillattr(inode, stat);
924			stat->mode = fi->orig_i_mode;
925			stat->ino = fi->orig_ino;
926		}
927	}
928
929	if (refreshed != NULL)
930		*refreshed = r;
931
932	return err;
933}
934
935int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
936			     u64 child_nodeid, struct qstr *name)
937{
938	int err = -ENOTDIR;
939	struct inode *parent;
940	struct dentry *dir;
941	struct dentry *entry;
942
943	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
944	if (!parent)
945		return -ENOENT;
946
947	mutex_lock(&parent->i_mutex);
948	if (!S_ISDIR(parent->i_mode))
949		goto unlock;
950
951	err = -ENOENT;
952	dir = d_find_alias(parent);
953	if (!dir)
954		goto unlock;
955
956	entry = d_lookup(dir, name);
957	dput(dir);
958	if (!entry)
959		goto unlock;
960
961	fuse_invalidate_attr(parent);
962	fuse_invalidate_entry(entry);
963
964	if (child_nodeid != 0 && d_really_is_positive(entry)) {
965		mutex_lock(&d_inode(entry)->i_mutex);
966		if (get_node_id(d_inode(entry)) != child_nodeid) {
967			err = -ENOENT;
968			goto badentry;
969		}
970		if (d_mountpoint(entry)) {
971			err = -EBUSY;
972			goto badentry;
973		}
974		if (d_is_dir(entry)) {
975			shrink_dcache_parent(entry);
976			if (!simple_empty(entry)) {
977				err = -ENOTEMPTY;
978				goto badentry;
979			}
980			d_inode(entry)->i_flags |= S_DEAD;
981		}
982		dont_mount(entry);
983		clear_nlink(d_inode(entry));
984		err = 0;
985 badentry:
986		mutex_unlock(&d_inode(entry)->i_mutex);
987		if (!err)
988			d_delete(entry);
989	} else {
990		err = 0;
991	}
992	dput(entry);
993
994 unlock:
995	mutex_unlock(&parent->i_mutex);
996	iput(parent);
997	return err;
998}
999
1000/*
1001 * Calling into a user-controlled filesystem gives the filesystem
1002 * daemon ptrace-like capabilities over the current process.  This
1003 * means, that the filesystem daemon is able to record the exact
1004 * filesystem operations performed, and can also control the behavior
1005 * of the requester process in otherwise impossible ways.  For example
1006 * it can delay the operation for arbitrary length of time allowing
1007 * DoS against the requester.
1008 *
1009 * For this reason only those processes can call into the filesystem,
1010 * for which the owner of the mount has ptrace privilege.  This
1011 * excludes processes started by other users, suid or sgid processes.
1012 */
1013int fuse_allow_current_process(struct fuse_conn *fc)
1014{
1015	const struct cred *cred;
1016
1017	if (fc->flags & FUSE_ALLOW_OTHER)
1018		return 1;
1019
1020	cred = current_cred();
1021	if (uid_eq(cred->euid, fc->user_id) &&
1022	    uid_eq(cred->suid, fc->user_id) &&
1023	    uid_eq(cred->uid,  fc->user_id) &&
1024	    gid_eq(cred->egid, fc->group_id) &&
1025	    gid_eq(cred->sgid, fc->group_id) &&
1026	    gid_eq(cred->gid,  fc->group_id))
1027		return 1;
1028
1029	return 0;
1030}
1031
1032static int fuse_access(struct inode *inode, int mask)
1033{
1034	struct fuse_conn *fc = get_fuse_conn(inode);
1035	FUSE_ARGS(args);
1036	struct fuse_access_in inarg;
1037	int err;
1038
1039	BUG_ON(mask & MAY_NOT_BLOCK);
1040
1041	if (fc->no_access)
1042		return 0;
1043
1044	memset(&inarg, 0, sizeof(inarg));
1045	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1046	args.in.h.opcode = FUSE_ACCESS;
1047	args.in.h.nodeid = get_node_id(inode);
1048	args.in.numargs = 1;
1049	args.in.args[0].size = sizeof(inarg);
1050	args.in.args[0].value = &inarg;
1051	err = fuse_simple_request(fc, &args);
1052	if (err == -ENOSYS) {
1053		fc->no_access = 1;
1054		err = 0;
1055	}
1056	return err;
1057}
1058
1059static int fuse_perm_getattr(struct inode *inode, int mask)
1060{
1061	if (mask & MAY_NOT_BLOCK)
1062		return -ECHILD;
1063
1064	return fuse_do_getattr(inode, NULL, NULL);
1065}
1066
1067/*
1068 * Check permission.  The two basic access models of FUSE are:
1069 *
1070 * 1) Local access checking ('default_permissions' mount option) based
1071 * on file mode.  This is the plain old disk filesystem permission
1072 * modell.
1073 *
1074 * 2) "Remote" access checking, where server is responsible for
1075 * checking permission in each inode operation.  An exception to this
1076 * is if ->permission() was invoked from sys_access() in which case an
1077 * access request is sent.  Execute permission is still checked
1078 * locally based on file mode.
1079 */
1080static int fuse_permission(struct inode *inode, int mask)
1081{
1082	struct fuse_conn *fc = get_fuse_conn(inode);
1083	bool refreshed = false;
1084	int err = 0;
1085
1086	if (!fuse_allow_current_process(fc))
1087		return -EACCES;
1088
1089	/*
1090	 * If attributes are needed, refresh them before proceeding
1091	 */
1092	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1093	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1094		struct fuse_inode *fi = get_fuse_inode(inode);
1095
1096		if (time_before64(fi->i_time, get_jiffies_64())) {
1097			refreshed = true;
1098
1099			err = fuse_perm_getattr(inode, mask);
1100			if (err)
1101				return err;
1102		}
1103	}
1104
1105	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1106		err = generic_permission(inode, mask);
1107
1108		/* If permission is denied, try to refresh file
1109		   attributes.  This is also needed, because the root
1110		   node will at first have no permissions */
1111		if (err == -EACCES && !refreshed) {
1112			err = fuse_perm_getattr(inode, mask);
1113			if (!err)
1114				err = generic_permission(inode, mask);
1115		}
1116
1117		/* Note: the opposite of the above test does not
1118		   exist.  So if permissions are revoked this won't be
1119		   noticed immediately, only after the attribute
1120		   timeout has expired */
1121	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1122		err = fuse_access(inode, mask);
1123	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1124		if (!(inode->i_mode & S_IXUGO)) {
1125			if (refreshed)
1126				return -EACCES;
1127
1128			err = fuse_perm_getattr(inode, mask);
1129			if (!err && !(inode->i_mode & S_IXUGO))
1130				return -EACCES;
1131		}
1132	}
1133	return err;
1134}
1135
1136static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1137			 struct dir_context *ctx)
1138{
1139	while (nbytes >= FUSE_NAME_OFFSET) {
1140		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1141		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1142		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1143			return -EIO;
1144		if (reclen > nbytes)
1145			break;
1146		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1147			return -EIO;
1148
1149		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1150			       dirent->ino, dirent->type))
1151			break;
1152
1153		buf += reclen;
1154		nbytes -= reclen;
1155		ctx->pos = dirent->off;
1156	}
1157
1158	return 0;
1159}
1160
1161static int fuse_direntplus_link(struct file *file,
1162				struct fuse_direntplus *direntplus,
1163				u64 attr_version)
1164{
1165	int err;
1166	struct fuse_entry_out *o = &direntplus->entry_out;
1167	struct fuse_dirent *dirent = &direntplus->dirent;
1168	struct dentry *parent = file->f_path.dentry;
1169	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1170	struct dentry *dentry;
1171	struct dentry *alias;
1172	struct inode *dir = d_inode(parent);
1173	struct fuse_conn *fc;
1174	struct inode *inode;
1175
1176	if (!o->nodeid) {
1177		/*
1178		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1179		 * ENOENT. Instead, it only means the userspace filesystem did
1180		 * not want to return attributes/handle for this entry.
1181		 *
1182		 * So do nothing.
1183		 */
1184		return 0;
1185	}
1186
1187	if (name.name[0] == '.') {
1188		/*
1189		 * We could potentially refresh the attributes of the directory
1190		 * and its parent?
1191		 */
1192		if (name.len == 1)
1193			return 0;
1194		if (name.name[1] == '.' && name.len == 2)
1195			return 0;
1196	}
1197
1198	if (invalid_nodeid(o->nodeid))
1199		return -EIO;
1200	if (!fuse_valid_type(o->attr.mode))
1201		return -EIO;
1202
1203	fc = get_fuse_conn(dir);
1204
1205	name.hash = full_name_hash(name.name, name.len);
1206	dentry = d_lookup(parent, &name);
1207	if (dentry) {
1208		inode = d_inode(dentry);
1209		if (!inode) {
1210			d_drop(dentry);
1211		} else if (get_node_id(inode) != o->nodeid ||
1212			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1213			d_invalidate(dentry);
1214		} else if (is_bad_inode(inode)) {
1215			err = -EIO;
1216			goto out;
1217		} else {
1218			struct fuse_inode *fi;
1219			fi = get_fuse_inode(inode);
1220			spin_lock(&fc->lock);
1221			fi->nlookup++;
1222			spin_unlock(&fc->lock);
1223
1224			fuse_change_attributes(inode, &o->attr,
1225					       entry_attr_timeout(o),
1226					       attr_version);
1227
1228			/*
1229			 * The other branch to 'found' comes via fuse_iget()
1230			 * which bumps nlookup inside
1231			 */
1232			goto found;
1233		}
1234		dput(dentry);
1235	}
1236
1237	dentry = d_alloc(parent, &name);
1238	err = -ENOMEM;
1239	if (!dentry)
1240		goto out;
1241
1242	inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1243			  &o->attr, entry_attr_timeout(o), attr_version);
1244	if (!inode)
1245		goto out;
1246
1247	alias = d_splice_alias(inode, dentry);
1248	err = PTR_ERR(alias);
1249	if (IS_ERR(alias))
1250		goto out;
1251
1252	if (alias) {
1253		dput(dentry);
1254		dentry = alias;
1255	}
1256
1257found:
1258	if (fc->readdirplus_auto)
1259		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1260	fuse_change_entry_timeout(dentry, o);
1261
1262	err = 0;
1263out:
1264	dput(dentry);
1265	return err;
1266}
1267
1268static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1269			     struct dir_context *ctx, u64 attr_version)
1270{
1271	struct fuse_direntplus *direntplus;
1272	struct fuse_dirent *dirent;
1273	size_t reclen;
1274	int over = 0;
1275	int ret;
1276
1277	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1278		direntplus = (struct fuse_direntplus *) buf;
1279		dirent = &direntplus->dirent;
1280		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1281
1282		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1283			return -EIO;
1284		if (reclen > nbytes)
1285			break;
1286		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1287			return -EIO;
1288
1289		if (!over) {
1290			/* We fill entries into dstbuf only as much as
1291			   it can hold. But we still continue iterating
1292			   over remaining entries to link them. If not,
1293			   we need to send a FORGET for each of those
1294			   which we did not link.
1295			*/
1296			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1297				       dirent->ino, dirent->type);
1298			ctx->pos = dirent->off;
1299		}
1300
1301		buf += reclen;
1302		nbytes -= reclen;
1303
1304		ret = fuse_direntplus_link(file, direntplus, attr_version);
1305		if (ret)
1306			fuse_force_forget(file, direntplus->entry_out.nodeid);
1307	}
1308
1309	return 0;
1310}
1311
1312static int fuse_readdir(struct file *file, struct dir_context *ctx)
1313{
1314	int plus, err;
1315	size_t nbytes;
1316	struct page *page;
1317	struct inode *inode = file_inode(file);
1318	struct fuse_conn *fc = get_fuse_conn(inode);
1319	struct fuse_req *req;
1320	u64 attr_version = 0;
1321
1322	if (is_bad_inode(inode))
1323		return -EIO;
1324
1325	req = fuse_get_req(fc, 1);
1326	if (IS_ERR(req))
1327		return PTR_ERR(req);
1328
1329	page = alloc_page(GFP_KERNEL);
1330	if (!page) {
1331		fuse_put_request(fc, req);
1332		return -ENOMEM;
1333	}
1334
1335	plus = fuse_use_readdirplus(inode, ctx);
1336	req->out.argpages = 1;
1337	req->num_pages = 1;
1338	req->pages[0] = page;
1339	req->page_descs[0].length = PAGE_SIZE;
1340	if (plus) {
1341		attr_version = fuse_get_attr_version(fc);
1342		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1343			       FUSE_READDIRPLUS);
1344	} else {
1345		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1346			       FUSE_READDIR);
1347	}
1348	fuse_request_send(fc, req);
1349	nbytes = req->out.args[0].size;
1350	err = req->out.h.error;
1351	fuse_put_request(fc, req);
1352	if (!err) {
1353		if (plus) {
1354			err = parse_dirplusfile(page_address(page), nbytes,
1355						file, ctx,
1356						attr_version);
1357		} else {
1358			err = parse_dirfile(page_address(page), nbytes, file,
1359					    ctx);
1360		}
1361	}
1362
1363	__free_page(page);
1364	fuse_invalidate_atime(inode);
1365	return err;
1366}
1367
1368static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
1369{
1370	struct inode *inode = d_inode(dentry);
1371	struct fuse_conn *fc = get_fuse_conn(inode);
1372	FUSE_ARGS(args);
1373	char *link;
1374	ssize_t ret;
1375
1376	link = (char *) __get_free_page(GFP_KERNEL);
1377	if (!link)
1378		return ERR_PTR(-ENOMEM);
1379
1380	args.in.h.opcode = FUSE_READLINK;
1381	args.in.h.nodeid = get_node_id(inode);
1382	args.out.argvar = 1;
1383	args.out.numargs = 1;
1384	args.out.args[0].size = PAGE_SIZE - 1;
1385	args.out.args[0].value = link;
1386	ret = fuse_simple_request(fc, &args);
1387	if (ret < 0) {
1388		free_page((unsigned long) link);
1389		link = ERR_PTR(ret);
1390	} else {
1391		link[ret] = '\0';
1392		*cookie = link;
1393	}
1394	fuse_invalidate_atime(inode);
1395	return link;
1396}
1397
1398static int fuse_dir_open(struct inode *inode, struct file *file)
1399{
1400	return fuse_open_common(inode, file, true);
1401}
1402
1403static int fuse_dir_release(struct inode *inode, struct file *file)
1404{
1405	fuse_release_common(file, FUSE_RELEASEDIR);
1406
1407	return 0;
1408}
1409
1410static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1411			  int datasync)
1412{
1413	return fuse_fsync_common(file, start, end, datasync, 1);
1414}
1415
1416static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1417			    unsigned long arg)
1418{
1419	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1420
1421	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1422	if (fc->minor < 18)
1423		return -ENOTTY;
1424
1425	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1426}
1427
1428static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1429				   unsigned long arg)
1430{
1431	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1432
1433	if (fc->minor < 18)
1434		return -ENOTTY;
1435
1436	return fuse_ioctl_common(file, cmd, arg,
1437				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1438}
1439
1440static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1441{
1442	/* Always update if mtime is explicitly set  */
1443	if (ivalid & ATTR_MTIME_SET)
1444		return true;
1445
1446	/* Or if kernel i_mtime is the official one */
1447	if (trust_local_mtime)
1448		return true;
1449
1450	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1451	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1452		return false;
1453
1454	/* In all other cases update */
1455	return true;
1456}
1457
1458static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1459			   bool trust_local_cmtime)
1460{
1461	unsigned ivalid = iattr->ia_valid;
1462
1463	if (ivalid & ATTR_MODE)
1464		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1465	if (ivalid & ATTR_UID)
1466		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1467	if (ivalid & ATTR_GID)
1468		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1469	if (ivalid & ATTR_SIZE)
1470		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1471	if (ivalid & ATTR_ATIME) {
1472		arg->valid |= FATTR_ATIME;
1473		arg->atime = iattr->ia_atime.tv_sec;
1474		arg->atimensec = iattr->ia_atime.tv_nsec;
1475		if (!(ivalid & ATTR_ATIME_SET))
1476			arg->valid |= FATTR_ATIME_NOW;
1477	}
1478	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1479		arg->valid |= FATTR_MTIME;
1480		arg->mtime = iattr->ia_mtime.tv_sec;
1481		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1482		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1483			arg->valid |= FATTR_MTIME_NOW;
1484	}
1485	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1486		arg->valid |= FATTR_CTIME;
1487		arg->ctime = iattr->ia_ctime.tv_sec;
1488		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1489	}
1490}
1491
1492/*
1493 * Prevent concurrent writepages on inode
1494 *
1495 * This is done by adding a negative bias to the inode write counter
1496 * and waiting for all pending writes to finish.
1497 */
1498void fuse_set_nowrite(struct inode *inode)
1499{
1500	struct fuse_conn *fc = get_fuse_conn(inode);
1501	struct fuse_inode *fi = get_fuse_inode(inode);
1502
1503	BUG_ON(!mutex_is_locked(&inode->i_mutex));
1504
1505	spin_lock(&fc->lock);
1506	BUG_ON(fi->writectr < 0);
1507	fi->writectr += FUSE_NOWRITE;
1508	spin_unlock(&fc->lock);
1509	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1510}
1511
1512/*
1513 * Allow writepages on inode
1514 *
1515 * Remove the bias from the writecounter and send any queued
1516 * writepages.
1517 */
1518static void __fuse_release_nowrite(struct inode *inode)
1519{
1520	struct fuse_inode *fi = get_fuse_inode(inode);
1521
1522	BUG_ON(fi->writectr != FUSE_NOWRITE);
1523	fi->writectr = 0;
1524	fuse_flush_writepages(inode);
1525}
1526
1527void fuse_release_nowrite(struct inode *inode)
1528{
1529	struct fuse_conn *fc = get_fuse_conn(inode);
1530
1531	spin_lock(&fc->lock);
1532	__fuse_release_nowrite(inode);
1533	spin_unlock(&fc->lock);
1534}
1535
1536static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1537			      struct inode *inode,
1538			      struct fuse_setattr_in *inarg_p,
1539			      struct fuse_attr_out *outarg_p)
1540{
1541	args->in.h.opcode = FUSE_SETATTR;
1542	args->in.h.nodeid = get_node_id(inode);
1543	args->in.numargs = 1;
1544	args->in.args[0].size = sizeof(*inarg_p);
1545	args->in.args[0].value = inarg_p;
1546	args->out.numargs = 1;
1547	args->out.args[0].size = sizeof(*outarg_p);
1548	args->out.args[0].value = outarg_p;
1549}
1550
1551/*
1552 * Flush inode->i_mtime to the server
1553 */
1554int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1555{
1556	struct fuse_conn *fc = get_fuse_conn(inode);
1557	FUSE_ARGS(args);
1558	struct fuse_setattr_in inarg;
1559	struct fuse_attr_out outarg;
1560
1561	memset(&inarg, 0, sizeof(inarg));
1562	memset(&outarg, 0, sizeof(outarg));
1563
1564	inarg.valid = FATTR_MTIME;
1565	inarg.mtime = inode->i_mtime.tv_sec;
1566	inarg.mtimensec = inode->i_mtime.tv_nsec;
1567	if (fc->minor >= 23) {
1568		inarg.valid |= FATTR_CTIME;
1569		inarg.ctime = inode->i_ctime.tv_sec;
1570		inarg.ctimensec = inode->i_ctime.tv_nsec;
1571	}
1572	if (ff) {
1573		inarg.valid |= FATTR_FH;
1574		inarg.fh = ff->fh;
1575	}
1576	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1577
1578	return fuse_simple_request(fc, &args);
1579}
1580
1581/*
1582 * Set attributes, and at the same time refresh them.
1583 *
1584 * Truncation is slightly complicated, because the 'truncate' request
1585 * may fail, in which case we don't want to touch the mapping.
1586 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1587 * and the actual truncation by hand.
1588 */
1589int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1590		    struct file *file)
1591{
1592	struct fuse_conn *fc = get_fuse_conn(inode);
1593	struct fuse_inode *fi = get_fuse_inode(inode);
1594	FUSE_ARGS(args);
1595	struct fuse_setattr_in inarg;
1596	struct fuse_attr_out outarg;
1597	bool is_truncate = false;
1598	bool is_wb = fc->writeback_cache;
1599	loff_t oldsize;
1600	int err;
1601	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1602
1603	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1604		attr->ia_valid |= ATTR_FORCE;
1605
1606	err = inode_change_ok(inode, attr);
1607	if (err)
1608		return err;
1609
1610	if (attr->ia_valid & ATTR_OPEN) {
1611		if (fc->atomic_o_trunc)
1612			return 0;
1613		file = NULL;
1614	}
1615
1616	if (attr->ia_valid & ATTR_SIZE)
1617		is_truncate = true;
1618
1619	if (is_truncate) {
1620		fuse_set_nowrite(inode);
1621		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1622		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1623			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1624	}
1625
1626	memset(&inarg, 0, sizeof(inarg));
1627	memset(&outarg, 0, sizeof(outarg));
1628	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1629	if (file) {
1630		struct fuse_file *ff = file->private_data;
1631		inarg.valid |= FATTR_FH;
1632		inarg.fh = ff->fh;
1633	}
1634	if (attr->ia_valid & ATTR_SIZE) {
1635		/* For mandatory locking in truncate */
1636		inarg.valid |= FATTR_LOCKOWNER;
1637		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1638	}
1639	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1640	err = fuse_simple_request(fc, &args);
1641	if (err) {
1642		if (err == -EINTR)
1643			fuse_invalidate_attr(inode);
1644		goto error;
1645	}
1646
1647	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1648		make_bad_inode(inode);
1649		err = -EIO;
1650		goto error;
1651	}
1652
1653	spin_lock(&fc->lock);
1654	/* the kernel maintains i_mtime locally */
1655	if (trust_local_cmtime) {
1656		if (attr->ia_valid & ATTR_MTIME)
1657			inode->i_mtime = attr->ia_mtime;
1658		if (attr->ia_valid & ATTR_CTIME)
1659			inode->i_ctime = attr->ia_ctime;
1660		/* FIXME: clear I_DIRTY_SYNC? */
1661	}
1662
1663	fuse_change_attributes_common(inode, &outarg.attr,
1664				      attr_timeout(&outarg));
1665	oldsize = inode->i_size;
1666	/* see the comment in fuse_change_attributes() */
1667	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1668		i_size_write(inode, outarg.attr.size);
1669
1670	if (is_truncate) {
1671		/* NOTE: this may release/reacquire fc->lock */
1672		__fuse_release_nowrite(inode);
1673	}
1674	spin_unlock(&fc->lock);
1675
1676	/*
1677	 * Only call invalidate_inode_pages2() after removing
1678	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1679	 */
1680	if ((is_truncate || !is_wb) &&
1681	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1682		truncate_pagecache(inode, outarg.attr.size);
1683		invalidate_inode_pages2(inode->i_mapping);
1684	}
1685
1686	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1687	return 0;
1688
1689error:
1690	if (is_truncate)
1691		fuse_release_nowrite(inode);
1692
1693	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1694	return err;
1695}
1696
1697static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1698{
1699	struct inode *inode = d_inode(entry);
1700
1701	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1702		return -EACCES;
1703
1704	if (attr->ia_valid & ATTR_FILE)
1705		return fuse_do_setattr(inode, attr, attr->ia_file);
1706	else
1707		return fuse_do_setattr(inode, attr, NULL);
1708}
1709
1710static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1711			struct kstat *stat)
1712{
1713	struct inode *inode = d_inode(entry);
1714	struct fuse_conn *fc = get_fuse_conn(inode);
1715
1716	if (!fuse_allow_current_process(fc))
1717		return -EACCES;
1718
1719	return fuse_update_attributes(inode, stat, NULL, NULL);
1720}
1721
1722static int fuse_setxattr(struct dentry *entry, const char *name,
1723			 const void *value, size_t size, int flags)
1724{
1725	struct inode *inode = d_inode(entry);
1726	struct fuse_conn *fc = get_fuse_conn(inode);
1727	FUSE_ARGS(args);
1728	struct fuse_setxattr_in inarg;
1729	int err;
1730
1731	if (fc->no_setxattr)
1732		return -EOPNOTSUPP;
1733
1734	memset(&inarg, 0, sizeof(inarg));
1735	inarg.size = size;
1736	inarg.flags = flags;
1737	args.in.h.opcode = FUSE_SETXATTR;
1738	args.in.h.nodeid = get_node_id(inode);
1739	args.in.numargs = 3;
1740	args.in.args[0].size = sizeof(inarg);
1741	args.in.args[0].value = &inarg;
1742	args.in.args[1].size = strlen(name) + 1;
1743	args.in.args[1].value = name;
1744	args.in.args[2].size = size;
1745	args.in.args[2].value = value;
1746	err = fuse_simple_request(fc, &args);
1747	if (err == -ENOSYS) {
1748		fc->no_setxattr = 1;
1749		err = -EOPNOTSUPP;
1750	}
1751	if (!err) {
1752		fuse_invalidate_attr(inode);
1753		fuse_update_ctime(inode);
1754	}
1755	return err;
1756}
1757
1758static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1759			     void *value, size_t size)
1760{
1761	struct inode *inode = d_inode(entry);
1762	struct fuse_conn *fc = get_fuse_conn(inode);
1763	FUSE_ARGS(args);
1764	struct fuse_getxattr_in inarg;
1765	struct fuse_getxattr_out outarg;
1766	ssize_t ret;
1767
1768	if (fc->no_getxattr)
1769		return -EOPNOTSUPP;
1770
1771	memset(&inarg, 0, sizeof(inarg));
1772	inarg.size = size;
1773	args.in.h.opcode = FUSE_GETXATTR;
1774	args.in.h.nodeid = get_node_id(inode);
1775	args.in.numargs = 2;
1776	args.in.args[0].size = sizeof(inarg);
1777	args.in.args[0].value = &inarg;
1778	args.in.args[1].size = strlen(name) + 1;
1779	args.in.args[1].value = name;
1780	/* This is really two different operations rolled into one */
1781	args.out.numargs = 1;
1782	if (size) {
1783		args.out.argvar = 1;
1784		args.out.args[0].size = size;
1785		args.out.args[0].value = value;
1786	} else {
1787		args.out.args[0].size = sizeof(outarg);
1788		args.out.args[0].value = &outarg;
1789	}
1790	ret = fuse_simple_request(fc, &args);
1791	if (!ret && !size)
1792		ret = outarg.size;
1793	if (ret == -ENOSYS) {
1794		fc->no_getxattr = 1;
1795		ret = -EOPNOTSUPP;
1796	}
1797	return ret;
1798}
1799
1800static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1801{
1802	struct inode *inode = d_inode(entry);
1803	struct fuse_conn *fc = get_fuse_conn(inode);
1804	FUSE_ARGS(args);
1805	struct fuse_getxattr_in inarg;
1806	struct fuse_getxattr_out outarg;
1807	ssize_t ret;
1808
1809	if (!fuse_allow_current_process(fc))
1810		return -EACCES;
1811
1812	if (fc->no_listxattr)
1813		return -EOPNOTSUPP;
1814
1815	memset(&inarg, 0, sizeof(inarg));
1816	inarg.size = size;
1817	args.in.h.opcode = FUSE_LISTXATTR;
1818	args.in.h.nodeid = get_node_id(inode);
1819	args.in.numargs = 1;
1820	args.in.args[0].size = sizeof(inarg);
1821	args.in.args[0].value = &inarg;
1822	/* This is really two different operations rolled into one */
1823	args.out.numargs = 1;
1824	if (size) {
1825		args.out.argvar = 1;
1826		args.out.args[0].size = size;
1827		args.out.args[0].value = list;
1828	} else {
1829		args.out.args[0].size = sizeof(outarg);
1830		args.out.args[0].value = &outarg;
1831	}
1832	ret = fuse_simple_request(fc, &args);
1833	if (!ret && !size)
1834		ret = outarg.size;
1835	if (ret == -ENOSYS) {
1836		fc->no_listxattr = 1;
1837		ret = -EOPNOTSUPP;
1838	}
1839	return ret;
1840}
1841
1842static int fuse_removexattr(struct dentry *entry, const char *name)
1843{
1844	struct inode *inode = d_inode(entry);
1845	struct fuse_conn *fc = get_fuse_conn(inode);
1846	FUSE_ARGS(args);
1847	int err;
1848
1849	if (fc->no_removexattr)
1850		return -EOPNOTSUPP;
1851
1852	args.in.h.opcode = FUSE_REMOVEXATTR;
1853	args.in.h.nodeid = get_node_id(inode);
1854	args.in.numargs = 1;
1855	args.in.args[0].size = strlen(name) + 1;
1856	args.in.args[0].value = name;
1857	err = fuse_simple_request(fc, &args);
1858	if (err == -ENOSYS) {
1859		fc->no_removexattr = 1;
1860		err = -EOPNOTSUPP;
1861	}
1862	if (!err) {
1863		fuse_invalidate_attr(inode);
1864		fuse_update_ctime(inode);
1865	}
1866	return err;
1867}
1868
1869static const struct inode_operations fuse_dir_inode_operations = {
1870	.lookup		= fuse_lookup,
1871	.mkdir		= fuse_mkdir,
1872	.symlink	= fuse_symlink,
1873	.unlink		= fuse_unlink,
1874	.rmdir		= fuse_rmdir,
1875	.rename2	= fuse_rename2,
1876	.link		= fuse_link,
1877	.setattr	= fuse_setattr,
1878	.create		= fuse_create,
1879	.atomic_open	= fuse_atomic_open,
1880	.mknod		= fuse_mknod,
1881	.permission	= fuse_permission,
1882	.getattr	= fuse_getattr,
1883	.setxattr	= fuse_setxattr,
1884	.getxattr	= fuse_getxattr,
1885	.listxattr	= fuse_listxattr,
1886	.removexattr	= fuse_removexattr,
1887};
1888
1889static const struct file_operations fuse_dir_operations = {
1890	.llseek		= generic_file_llseek,
1891	.read		= generic_read_dir,
1892	.iterate	= fuse_readdir,
1893	.open		= fuse_dir_open,
1894	.release	= fuse_dir_release,
1895	.fsync		= fuse_dir_fsync,
1896	.unlocked_ioctl	= fuse_dir_ioctl,
1897	.compat_ioctl	= fuse_dir_compat_ioctl,
1898};
1899
1900static const struct inode_operations fuse_common_inode_operations = {
1901	.setattr	= fuse_setattr,
1902	.permission	= fuse_permission,
1903	.getattr	= fuse_getattr,
1904	.setxattr	= fuse_setxattr,
1905	.getxattr	= fuse_getxattr,
1906	.listxattr	= fuse_listxattr,
1907	.removexattr	= fuse_removexattr,
1908};
1909
1910static const struct inode_operations fuse_symlink_inode_operations = {
1911	.setattr	= fuse_setattr,
1912	.follow_link	= fuse_follow_link,
1913	.put_link	= free_page_put_link,
1914	.readlink	= generic_readlink,
1915	.getattr	= fuse_getattr,
1916	.setxattr	= fuse_setxattr,
1917	.getxattr	= fuse_getxattr,
1918	.listxattr	= fuse_listxattr,
1919	.removexattr	= fuse_removexattr,
1920};
1921
1922void fuse_init_common(struct inode *inode)
1923{
1924	inode->i_op = &fuse_common_inode_operations;
1925}
1926
1927void fuse_init_dir(struct inode *inode)
1928{
1929	inode->i_op = &fuse_dir_inode_operations;
1930	inode->i_fop = &fuse_dir_operations;
1931}
1932
1933void fuse_init_symlink(struct inode *inode)
1934{
1935	inode->i_op = &fuse_symlink_inode_operations;
1936}
1937