1/*
2 * fs/f2fs/file.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 *             http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/stat.h>
14#include <linux/buffer_head.h>
15#include <linux/writeback.h>
16#include <linux/blkdev.h>
17#include <linux/falloc.h>
18#include <linux/types.h>
19#include <linux/compat.h>
20#include <linux/uaccess.h>
21#include <linux/mount.h>
22#include <linux/pagevec.h>
23
24#include "f2fs.h"
25#include "node.h"
26#include "segment.h"
27#include "xattr.h"
28#include "acl.h"
29#include "trace.h"
30#include <trace/events/f2fs.h>
31
32static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
33						struct vm_fault *vmf)
34{
35	struct page *page = vmf->page;
36	struct inode *inode = file_inode(vma->vm_file);
37	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
38	struct dnode_of_data dn;
39	int err;
40
41	f2fs_balance_fs(sbi);
42
43	sb_start_pagefault(inode->i_sb);
44
45	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
46
47	/* block allocation */
48	f2fs_lock_op(sbi);
49	set_new_dnode(&dn, inode, NULL, NULL, 0);
50	err = f2fs_reserve_block(&dn, page->index);
51	if (err) {
52		f2fs_unlock_op(sbi);
53		goto out;
54	}
55	f2fs_put_dnode(&dn);
56	f2fs_unlock_op(sbi);
57
58	file_update_time(vma->vm_file);
59	lock_page(page);
60	if (unlikely(page->mapping != inode->i_mapping ||
61			page_offset(page) > i_size_read(inode) ||
62			!PageUptodate(page))) {
63		unlock_page(page);
64		err = -EFAULT;
65		goto out;
66	}
67
68	/*
69	 * check to see if the page is mapped already (no holes)
70	 */
71	if (PageMappedToDisk(page))
72		goto mapped;
73
74	/* page is wholly or partially inside EOF */
75	if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
76		unsigned offset;
77		offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
78		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
79	}
80	set_page_dirty(page);
81	SetPageUptodate(page);
82
83	trace_f2fs_vm_page_mkwrite(page, DATA);
84mapped:
85	/* fill the page */
86	f2fs_wait_on_page_writeback(page, DATA);
87out:
88	sb_end_pagefault(inode->i_sb);
89	return block_page_mkwrite_return(err);
90}
91
92static const struct vm_operations_struct f2fs_file_vm_ops = {
93	.fault		= filemap_fault,
94	.map_pages	= filemap_map_pages,
95	.page_mkwrite	= f2fs_vm_page_mkwrite,
96};
97
98static int get_parent_ino(struct inode *inode, nid_t *pino)
99{
100	struct dentry *dentry;
101
102	inode = igrab(inode);
103	dentry = d_find_any_alias(inode);
104	iput(inode);
105	if (!dentry)
106		return 0;
107
108	if (update_dent_inode(inode, &dentry->d_name)) {
109		dput(dentry);
110		return 0;
111	}
112
113	*pino = parent_ino(dentry);
114	dput(dentry);
115	return 1;
116}
117
118static inline bool need_do_checkpoint(struct inode *inode)
119{
120	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
121	bool need_cp = false;
122
123	if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
124		need_cp = true;
125	else if (file_wrong_pino(inode))
126		need_cp = true;
127	else if (!space_for_roll_forward(sbi))
128		need_cp = true;
129	else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
130		need_cp = true;
131	else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
132		need_cp = true;
133	else if (test_opt(sbi, FASTBOOT))
134		need_cp = true;
135	else if (sbi->active_logs == 2)
136		need_cp = true;
137
138	return need_cp;
139}
140
141static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
142{
143	struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
144	bool ret = false;
145	/* But we need to avoid that there are some inode updates */
146	if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino))
147		ret = true;
148	f2fs_put_page(i, 0);
149	return ret;
150}
151
152static void try_to_fix_pino(struct inode *inode)
153{
154	struct f2fs_inode_info *fi = F2FS_I(inode);
155	nid_t pino;
156
157	down_write(&fi->i_sem);
158	fi->xattr_ver = 0;
159	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
160			get_parent_ino(inode, &pino)) {
161		fi->i_pino = pino;
162		file_got_pino(inode);
163		up_write(&fi->i_sem);
164
165		mark_inode_dirty_sync(inode);
166		f2fs_write_inode(inode, NULL);
167	} else {
168		up_write(&fi->i_sem);
169	}
170}
171
172int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
173{
174	struct inode *inode = file->f_mapping->host;
175	struct f2fs_inode_info *fi = F2FS_I(inode);
176	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
177	nid_t ino = inode->i_ino;
178	int ret = 0;
179	bool need_cp = false;
180	struct writeback_control wbc = {
181		.sync_mode = WB_SYNC_ALL,
182		.nr_to_write = LONG_MAX,
183		.for_reclaim = 0,
184	};
185
186	if (unlikely(f2fs_readonly(inode->i_sb)))
187		return 0;
188
189	trace_f2fs_sync_file_enter(inode);
190
191	/* if fdatasync is triggered, let's do in-place-update */
192	if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
193		set_inode_flag(fi, FI_NEED_IPU);
194	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
195	clear_inode_flag(fi, FI_NEED_IPU);
196
197	if (ret) {
198		trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
199		return ret;
200	}
201
202	/* if the inode is dirty, let's recover all the time */
203	if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) {
204		update_inode_page(inode);
205		goto go_write;
206	}
207
208	/*
209	 * if there is no written data, don't waste time to write recovery info.
210	 */
211	if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
212			!exist_written_data(sbi, ino, APPEND_INO)) {
213
214		/* it may call write_inode just prior to fsync */
215		if (need_inode_page_update(sbi, ino))
216			goto go_write;
217
218		if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
219				exist_written_data(sbi, ino, UPDATE_INO))
220			goto flush_out;
221		goto out;
222	}
223go_write:
224	/* guarantee free sections for fsync */
225	f2fs_balance_fs(sbi);
226
227	/*
228	 * Both of fdatasync() and fsync() are able to be recovered from
229	 * sudden-power-off.
230	 */
231	down_read(&fi->i_sem);
232	need_cp = need_do_checkpoint(inode);
233	up_read(&fi->i_sem);
234
235	if (need_cp) {
236		/* all the dirty node pages should be flushed for POR */
237		ret = f2fs_sync_fs(inode->i_sb, 1);
238
239		/*
240		 * We've secured consistency through sync_fs. Following pino
241		 * will be used only for fsynced inodes after checkpoint.
242		 */
243		try_to_fix_pino(inode);
244		clear_inode_flag(fi, FI_APPEND_WRITE);
245		clear_inode_flag(fi, FI_UPDATE_WRITE);
246		goto out;
247	}
248sync_nodes:
249	sync_node_pages(sbi, ino, &wbc);
250
251	/* if cp_error was enabled, we should avoid infinite loop */
252	if (unlikely(f2fs_cp_error(sbi)))
253		goto out;
254
255	if (need_inode_block_update(sbi, ino)) {
256		mark_inode_dirty_sync(inode);
257		f2fs_write_inode(inode, NULL);
258		goto sync_nodes;
259	}
260
261	ret = wait_on_node_pages_writeback(sbi, ino);
262	if (ret)
263		goto out;
264
265	/* once recovery info is written, don't need to tack this */
266	remove_dirty_inode(sbi, ino, APPEND_INO);
267	clear_inode_flag(fi, FI_APPEND_WRITE);
268flush_out:
269	remove_dirty_inode(sbi, ino, UPDATE_INO);
270	clear_inode_flag(fi, FI_UPDATE_WRITE);
271	ret = f2fs_issue_flush(sbi);
272out:
273	trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
274	f2fs_trace_ios(NULL, NULL, 1);
275	return ret;
276}
277
278static pgoff_t __get_first_dirty_index(struct address_space *mapping,
279						pgoff_t pgofs, int whence)
280{
281	struct pagevec pvec;
282	int nr_pages;
283
284	if (whence != SEEK_DATA)
285		return 0;
286
287	/* find first dirty page index */
288	pagevec_init(&pvec, 0);
289	nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
290					PAGECACHE_TAG_DIRTY, 1);
291	pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
292	pagevec_release(&pvec);
293	return pgofs;
294}
295
296static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
297							int whence)
298{
299	switch (whence) {
300	case SEEK_DATA:
301		if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
302			(blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
303			return true;
304		break;
305	case SEEK_HOLE:
306		if (blkaddr == NULL_ADDR)
307			return true;
308		break;
309	}
310	return false;
311}
312
313static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
314{
315	struct inode *inode = file->f_mapping->host;
316	loff_t maxbytes = inode->i_sb->s_maxbytes;
317	struct dnode_of_data dn;
318	pgoff_t pgofs, end_offset, dirty;
319	loff_t data_ofs = offset;
320	loff_t isize;
321	int err = 0;
322
323	mutex_lock(&inode->i_mutex);
324
325	isize = i_size_read(inode);
326	if (offset >= isize)
327		goto fail;
328
329	/* handle inline data case */
330	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
331		if (whence == SEEK_HOLE)
332			data_ofs = isize;
333		goto found;
334	}
335
336	pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
337
338	dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
339
340	for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
341		set_new_dnode(&dn, inode, NULL, NULL, 0);
342		err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
343		if (err && err != -ENOENT) {
344			goto fail;
345		} else if (err == -ENOENT) {
346			/* direct node does not exists */
347			if (whence == SEEK_DATA) {
348				pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
349							F2FS_I(inode));
350				continue;
351			} else {
352				goto found;
353			}
354		}
355
356		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
357
358		/* find data/hole in dnode block */
359		for (; dn.ofs_in_node < end_offset;
360				dn.ofs_in_node++, pgofs++,
361				data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
362			block_t blkaddr;
363			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
364
365			if (__found_offset(blkaddr, dirty, pgofs, whence)) {
366				f2fs_put_dnode(&dn);
367				goto found;
368			}
369		}
370		f2fs_put_dnode(&dn);
371	}
372
373	if (whence == SEEK_DATA)
374		goto fail;
375found:
376	if (whence == SEEK_HOLE && data_ofs > isize)
377		data_ofs = isize;
378	mutex_unlock(&inode->i_mutex);
379	return vfs_setpos(file, data_ofs, maxbytes);
380fail:
381	mutex_unlock(&inode->i_mutex);
382	return -ENXIO;
383}
384
385static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
386{
387	struct inode *inode = file->f_mapping->host;
388	loff_t maxbytes = inode->i_sb->s_maxbytes;
389
390	switch (whence) {
391	case SEEK_SET:
392	case SEEK_CUR:
393	case SEEK_END:
394		return generic_file_llseek_size(file, offset, whence,
395						maxbytes, i_size_read(inode));
396	case SEEK_DATA:
397	case SEEK_HOLE:
398		if (offset < 0)
399			return -ENXIO;
400		return f2fs_seek_block(file, offset, whence);
401	}
402
403	return -EINVAL;
404}
405
406static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
407{
408	struct inode *inode = file_inode(file);
409
410	/* we don't need to use inline_data strictly */
411	if (f2fs_has_inline_data(inode)) {
412		int err = f2fs_convert_inline_inode(inode);
413		if (err)
414			return err;
415	}
416
417	file_accessed(file);
418	vma->vm_ops = &f2fs_file_vm_ops;
419	return 0;
420}
421
422int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
423{
424	int nr_free = 0, ofs = dn->ofs_in_node;
425	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
426	struct f2fs_node *raw_node;
427	__le32 *addr;
428
429	raw_node = F2FS_NODE(dn->node_page);
430	addr = blkaddr_in_node(raw_node) + ofs;
431
432	for (; count > 0; count--, addr++, dn->ofs_in_node++) {
433		block_t blkaddr = le32_to_cpu(*addr);
434		if (blkaddr == NULL_ADDR)
435			continue;
436
437		dn->data_blkaddr = NULL_ADDR;
438		set_data_blkaddr(dn);
439		f2fs_update_extent_cache(dn);
440		invalidate_blocks(sbi, blkaddr);
441		if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
442			clear_inode_flag(F2FS_I(dn->inode),
443						FI_FIRST_BLOCK_WRITTEN);
444		nr_free++;
445	}
446	if (nr_free) {
447		dec_valid_block_count(sbi, dn->inode, nr_free);
448		set_page_dirty(dn->node_page);
449		sync_inode_page(dn);
450	}
451	dn->ofs_in_node = ofs;
452
453	trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
454					 dn->ofs_in_node, nr_free);
455	return nr_free;
456}
457
458void truncate_data_blocks(struct dnode_of_data *dn)
459{
460	truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
461}
462
463static int truncate_partial_data_page(struct inode *inode, u64 from,
464								bool force)
465{
466	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
467	struct page *page;
468
469	if (!offset && !force)
470		return 0;
471
472	page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force);
473	if (IS_ERR(page))
474		return 0;
475
476	lock_page(page);
477	if (unlikely(!PageUptodate(page) ||
478			page->mapping != inode->i_mapping))
479		goto out;
480
481	f2fs_wait_on_page_writeback(page, DATA);
482	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
483	if (!force)
484		set_page_dirty(page);
485out:
486	f2fs_put_page(page, 1);
487	return 0;
488}
489
490int truncate_blocks(struct inode *inode, u64 from, bool lock)
491{
492	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
493	unsigned int blocksize = inode->i_sb->s_blocksize;
494	struct dnode_of_data dn;
495	pgoff_t free_from;
496	int count = 0, err = 0;
497	struct page *ipage;
498	bool truncate_page = false;
499
500	trace_f2fs_truncate_blocks_enter(inode, from);
501
502	free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1);
503
504	if (lock)
505		f2fs_lock_op(sbi);
506
507	ipage = get_node_page(sbi, inode->i_ino);
508	if (IS_ERR(ipage)) {
509		err = PTR_ERR(ipage);
510		goto out;
511	}
512
513	if (f2fs_has_inline_data(inode)) {
514		if (truncate_inline_inode(ipage, from))
515			set_page_dirty(ipage);
516		f2fs_put_page(ipage, 1);
517		truncate_page = true;
518		goto out;
519	}
520
521	set_new_dnode(&dn, inode, ipage, NULL, 0);
522	err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
523	if (err) {
524		if (err == -ENOENT)
525			goto free_next;
526		goto out;
527	}
528
529	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
530
531	count -= dn.ofs_in_node;
532	f2fs_bug_on(sbi, count < 0);
533
534	if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
535		truncate_data_blocks_range(&dn, count);
536		free_from += count;
537	}
538
539	f2fs_put_dnode(&dn);
540free_next:
541	err = truncate_inode_blocks(inode, free_from);
542out:
543	if (lock)
544		f2fs_unlock_op(sbi);
545
546	/* lastly zero out the first data page */
547	if (!err)
548		err = truncate_partial_data_page(inode, from, truncate_page);
549
550	trace_f2fs_truncate_blocks_exit(inode, err);
551	return err;
552}
553
554void f2fs_truncate(struct inode *inode)
555{
556	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
557				S_ISLNK(inode->i_mode)))
558		return;
559
560	trace_f2fs_truncate(inode);
561
562	/* we should check inline_data size */
563	if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) {
564		if (f2fs_convert_inline_inode(inode))
565			return;
566	}
567
568	if (!truncate_blocks(inode, i_size_read(inode), true)) {
569		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
570		mark_inode_dirty(inode);
571	}
572}
573
574int f2fs_getattr(struct vfsmount *mnt,
575			 struct dentry *dentry, struct kstat *stat)
576{
577	struct inode *inode = d_inode(dentry);
578	generic_fillattr(inode, stat);
579	stat->blocks <<= 3;
580	return 0;
581}
582
583#ifdef CONFIG_F2FS_FS_POSIX_ACL
584static void __setattr_copy(struct inode *inode, const struct iattr *attr)
585{
586	struct f2fs_inode_info *fi = F2FS_I(inode);
587	unsigned int ia_valid = attr->ia_valid;
588
589	if (ia_valid & ATTR_UID)
590		inode->i_uid = attr->ia_uid;
591	if (ia_valid & ATTR_GID)
592		inode->i_gid = attr->ia_gid;
593	if (ia_valid & ATTR_ATIME)
594		inode->i_atime = timespec_trunc(attr->ia_atime,
595						inode->i_sb->s_time_gran);
596	if (ia_valid & ATTR_MTIME)
597		inode->i_mtime = timespec_trunc(attr->ia_mtime,
598						inode->i_sb->s_time_gran);
599	if (ia_valid & ATTR_CTIME)
600		inode->i_ctime = timespec_trunc(attr->ia_ctime,
601						inode->i_sb->s_time_gran);
602	if (ia_valid & ATTR_MODE) {
603		umode_t mode = attr->ia_mode;
604
605		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
606			mode &= ~S_ISGID;
607		set_acl_inode(fi, mode);
608	}
609}
610#else
611#define __setattr_copy setattr_copy
612#endif
613
614int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
615{
616	struct inode *inode = d_inode(dentry);
617	struct f2fs_inode_info *fi = F2FS_I(inode);
618	int err;
619
620	err = inode_change_ok(inode, attr);
621	if (err)
622		return err;
623
624	if (attr->ia_valid & ATTR_SIZE) {
625		if (attr->ia_size != i_size_read(inode)) {
626			truncate_setsize(inode, attr->ia_size);
627			f2fs_truncate(inode);
628			f2fs_balance_fs(F2FS_I_SB(inode));
629		} else {
630			/*
631			 * giving a chance to truncate blocks past EOF which
632			 * are fallocated with FALLOC_FL_KEEP_SIZE.
633			 */
634			f2fs_truncate(inode);
635		}
636	}
637
638	__setattr_copy(inode, attr);
639
640	if (attr->ia_valid & ATTR_MODE) {
641		err = posix_acl_chmod(inode, get_inode_mode(inode));
642		if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
643			inode->i_mode = fi->i_acl_mode;
644			clear_inode_flag(fi, FI_ACL_MODE);
645		}
646	}
647
648	mark_inode_dirty(inode);
649	return err;
650}
651
652const struct inode_operations f2fs_file_inode_operations = {
653	.getattr	= f2fs_getattr,
654	.setattr	= f2fs_setattr,
655	.get_acl	= f2fs_get_acl,
656	.set_acl	= f2fs_set_acl,
657#ifdef CONFIG_F2FS_FS_XATTR
658	.setxattr	= generic_setxattr,
659	.getxattr	= generic_getxattr,
660	.listxattr	= f2fs_listxattr,
661	.removexattr	= generic_removexattr,
662#endif
663	.fiemap		= f2fs_fiemap,
664};
665
666static void fill_zero(struct inode *inode, pgoff_t index,
667					loff_t start, loff_t len)
668{
669	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
670	struct page *page;
671
672	if (!len)
673		return;
674
675	f2fs_balance_fs(sbi);
676
677	f2fs_lock_op(sbi);
678	page = get_new_data_page(inode, NULL, index, false);
679	f2fs_unlock_op(sbi);
680
681	if (!IS_ERR(page)) {
682		f2fs_wait_on_page_writeback(page, DATA);
683		zero_user(page, start, len);
684		set_page_dirty(page);
685		f2fs_put_page(page, 1);
686	}
687}
688
689int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
690{
691	pgoff_t index;
692	int err;
693
694	for (index = pg_start; index < pg_end; index++) {
695		struct dnode_of_data dn;
696
697		set_new_dnode(&dn, inode, NULL, NULL, 0);
698		err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
699		if (err) {
700			if (err == -ENOENT)
701				continue;
702			return err;
703		}
704
705		if (dn.data_blkaddr != NULL_ADDR)
706			truncate_data_blocks_range(&dn, 1);
707		f2fs_put_dnode(&dn);
708	}
709	return 0;
710}
711
712static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
713{
714	pgoff_t pg_start, pg_end;
715	loff_t off_start, off_end;
716	int ret = 0;
717
718	if (!S_ISREG(inode->i_mode))
719		return -EOPNOTSUPP;
720
721	/* skip punching hole beyond i_size */
722	if (offset >= inode->i_size)
723		return ret;
724
725	if (f2fs_has_inline_data(inode)) {
726		ret = f2fs_convert_inline_inode(inode);
727		if (ret)
728			return ret;
729	}
730
731	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
732	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
733
734	off_start = offset & (PAGE_CACHE_SIZE - 1);
735	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
736
737	if (pg_start == pg_end) {
738		fill_zero(inode, pg_start, off_start,
739						off_end - off_start);
740	} else {
741		if (off_start)
742			fill_zero(inode, pg_start++, off_start,
743					PAGE_CACHE_SIZE - off_start);
744		if (off_end)
745			fill_zero(inode, pg_end, 0, off_end);
746
747		if (pg_start < pg_end) {
748			struct address_space *mapping = inode->i_mapping;
749			loff_t blk_start, blk_end;
750			struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
751
752			f2fs_balance_fs(sbi);
753
754			blk_start = pg_start << PAGE_CACHE_SHIFT;
755			blk_end = pg_end << PAGE_CACHE_SHIFT;
756			truncate_inode_pages_range(mapping, blk_start,
757					blk_end - 1);
758
759			f2fs_lock_op(sbi);
760			ret = truncate_hole(inode, pg_start, pg_end);
761			f2fs_unlock_op(sbi);
762		}
763	}
764
765	return ret;
766}
767
768static int expand_inode_data(struct inode *inode, loff_t offset,
769					loff_t len, int mode)
770{
771	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
772	pgoff_t index, pg_start, pg_end;
773	loff_t new_size = i_size_read(inode);
774	loff_t off_start, off_end;
775	int ret = 0;
776
777	f2fs_balance_fs(sbi);
778
779	ret = inode_newsize_ok(inode, (len + offset));
780	if (ret)
781		return ret;
782
783	if (f2fs_has_inline_data(inode)) {
784		ret = f2fs_convert_inline_inode(inode);
785		if (ret)
786			return ret;
787	}
788
789	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
790	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
791
792	off_start = offset & (PAGE_CACHE_SIZE - 1);
793	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
794
795	f2fs_lock_op(sbi);
796
797	for (index = pg_start; index <= pg_end; index++) {
798		struct dnode_of_data dn;
799
800		if (index == pg_end && !off_end)
801			goto noalloc;
802
803		set_new_dnode(&dn, inode, NULL, NULL, 0);
804		ret = f2fs_reserve_block(&dn, index);
805		if (ret)
806			break;
807noalloc:
808		if (pg_start == pg_end)
809			new_size = offset + len;
810		else if (index == pg_start && off_start)
811			new_size = (index + 1) << PAGE_CACHE_SHIFT;
812		else if (index == pg_end)
813			new_size = (index << PAGE_CACHE_SHIFT) + off_end;
814		else
815			new_size += PAGE_CACHE_SIZE;
816	}
817
818	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
819		i_size_read(inode) < new_size) {
820		i_size_write(inode, new_size);
821		mark_inode_dirty(inode);
822		update_inode_page(inode);
823	}
824	f2fs_unlock_op(sbi);
825
826	return ret;
827}
828
829static long f2fs_fallocate(struct file *file, int mode,
830				loff_t offset, loff_t len)
831{
832	struct inode *inode = file_inode(file);
833	long ret;
834
835	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
836		return -EOPNOTSUPP;
837
838	mutex_lock(&inode->i_mutex);
839
840	if (mode & FALLOC_FL_PUNCH_HOLE)
841		ret = punch_hole(inode, offset, len);
842	else
843		ret = expand_inode_data(inode, offset, len, mode);
844
845	if (!ret) {
846		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
847		mark_inode_dirty(inode);
848	}
849
850	mutex_unlock(&inode->i_mutex);
851
852	trace_f2fs_fallocate(inode, mode, offset, len, ret);
853	return ret;
854}
855
856static int f2fs_release_file(struct inode *inode, struct file *filp)
857{
858	/* some remained atomic pages should discarded */
859	if (f2fs_is_atomic_file(inode))
860		commit_inmem_pages(inode, true);
861	if (f2fs_is_volatile_file(inode)) {
862		set_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
863		filemap_fdatawrite(inode->i_mapping);
864		clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
865	}
866	return 0;
867}
868
869#define F2FS_REG_FLMASK		(~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
870#define F2FS_OTHER_FLMASK	(FS_NODUMP_FL | FS_NOATIME_FL)
871
872static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
873{
874	if (S_ISDIR(mode))
875		return flags;
876	else if (S_ISREG(mode))
877		return flags & F2FS_REG_FLMASK;
878	else
879		return flags & F2FS_OTHER_FLMASK;
880}
881
882static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
883{
884	struct inode *inode = file_inode(filp);
885	struct f2fs_inode_info *fi = F2FS_I(inode);
886	unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
887	return put_user(flags, (int __user *)arg);
888}
889
890static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
891{
892	struct inode *inode = file_inode(filp);
893	struct f2fs_inode_info *fi = F2FS_I(inode);
894	unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
895	unsigned int oldflags;
896	int ret;
897
898	ret = mnt_want_write_file(filp);
899	if (ret)
900		return ret;
901
902	if (!inode_owner_or_capable(inode)) {
903		ret = -EACCES;
904		goto out;
905	}
906
907	if (get_user(flags, (int __user *)arg)) {
908		ret = -EFAULT;
909		goto out;
910	}
911
912	flags = f2fs_mask_flags(inode->i_mode, flags);
913
914	mutex_lock(&inode->i_mutex);
915
916	oldflags = fi->i_flags;
917
918	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
919		if (!capable(CAP_LINUX_IMMUTABLE)) {
920			mutex_unlock(&inode->i_mutex);
921			ret = -EPERM;
922			goto out;
923		}
924	}
925
926	flags = flags & FS_FL_USER_MODIFIABLE;
927	flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
928	fi->i_flags = flags;
929	mutex_unlock(&inode->i_mutex);
930
931	f2fs_set_inode_flags(inode);
932	inode->i_ctime = CURRENT_TIME;
933	mark_inode_dirty(inode);
934out:
935	mnt_drop_write_file(filp);
936	return ret;
937}
938
939static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
940{
941	struct inode *inode = file_inode(filp);
942
943	return put_user(inode->i_generation, (int __user *)arg);
944}
945
946static int f2fs_ioc_start_atomic_write(struct file *filp)
947{
948	struct inode *inode = file_inode(filp);
949
950	if (!inode_owner_or_capable(inode))
951		return -EACCES;
952
953	f2fs_balance_fs(F2FS_I_SB(inode));
954
955	if (f2fs_is_atomic_file(inode))
956		return 0;
957
958	set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
959
960	return f2fs_convert_inline_inode(inode);
961}
962
963static int f2fs_ioc_commit_atomic_write(struct file *filp)
964{
965	struct inode *inode = file_inode(filp);
966	int ret;
967
968	if (!inode_owner_or_capable(inode))
969		return -EACCES;
970
971	if (f2fs_is_volatile_file(inode))
972		return 0;
973
974	ret = mnt_want_write_file(filp);
975	if (ret)
976		return ret;
977
978	if (f2fs_is_atomic_file(inode))
979		commit_inmem_pages(inode, false);
980
981	ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
982	mnt_drop_write_file(filp);
983	clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
984	return ret;
985}
986
987static int f2fs_ioc_start_volatile_write(struct file *filp)
988{
989	struct inode *inode = file_inode(filp);
990
991	if (!inode_owner_or_capable(inode))
992		return -EACCES;
993
994	if (f2fs_is_volatile_file(inode))
995		return 0;
996
997	set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
998
999	return f2fs_convert_inline_inode(inode);
1000}
1001
1002static int f2fs_ioc_release_volatile_write(struct file *filp)
1003{
1004	struct inode *inode = file_inode(filp);
1005
1006	if (!inode_owner_or_capable(inode))
1007		return -EACCES;
1008
1009	if (!f2fs_is_volatile_file(inode))
1010		return 0;
1011
1012	if (!f2fs_is_first_block_written(inode))
1013		return truncate_partial_data_page(inode, 0, true);
1014
1015	punch_hole(inode, 0, F2FS_BLKSIZE);
1016	return 0;
1017}
1018
1019static int f2fs_ioc_abort_volatile_write(struct file *filp)
1020{
1021	struct inode *inode = file_inode(filp);
1022	int ret;
1023
1024	if (!inode_owner_or_capable(inode))
1025		return -EACCES;
1026
1027	ret = mnt_want_write_file(filp);
1028	if (ret)
1029		return ret;
1030
1031	f2fs_balance_fs(F2FS_I_SB(inode));
1032
1033	if (f2fs_is_atomic_file(inode)) {
1034		commit_inmem_pages(inode, false);
1035		clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1036	}
1037
1038	if (f2fs_is_volatile_file(inode)) {
1039		clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1040		filemap_fdatawrite(inode->i_mapping);
1041		set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1042	}
1043	mnt_drop_write_file(filp);
1044	return ret;
1045}
1046
1047static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1048{
1049	struct inode *inode = file_inode(filp);
1050	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1051	struct super_block *sb = sbi->sb;
1052	__u32 in;
1053
1054	if (!capable(CAP_SYS_ADMIN))
1055		return -EPERM;
1056
1057	if (get_user(in, (__u32 __user *)arg))
1058		return -EFAULT;
1059
1060	switch (in) {
1061	case F2FS_GOING_DOWN_FULLSYNC:
1062		sb = freeze_bdev(sb->s_bdev);
1063		if (sb && !IS_ERR(sb)) {
1064			f2fs_stop_checkpoint(sbi);
1065			thaw_bdev(sb->s_bdev, sb);
1066		}
1067		break;
1068	case F2FS_GOING_DOWN_METASYNC:
1069		/* do checkpoint only */
1070		f2fs_sync_fs(sb, 1);
1071		f2fs_stop_checkpoint(sbi);
1072		break;
1073	case F2FS_GOING_DOWN_NOSYNC:
1074		f2fs_stop_checkpoint(sbi);
1075		break;
1076	default:
1077		return -EINVAL;
1078	}
1079	return 0;
1080}
1081
1082static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
1083{
1084	struct inode *inode = file_inode(filp);
1085	struct super_block *sb = inode->i_sb;
1086	struct request_queue *q = bdev_get_queue(sb->s_bdev);
1087	struct fstrim_range range;
1088	int ret;
1089
1090	if (!capable(CAP_SYS_ADMIN))
1091		return -EPERM;
1092
1093	if (!blk_queue_discard(q))
1094		return -EOPNOTSUPP;
1095
1096	if (copy_from_user(&range, (struct fstrim_range __user *)arg,
1097				sizeof(range)))
1098		return -EFAULT;
1099
1100	range.minlen = max((unsigned int)range.minlen,
1101				q->limits.discard_granularity);
1102	ret = f2fs_trim_fs(F2FS_SB(sb), &range);
1103	if (ret < 0)
1104		return ret;
1105
1106	if (copy_to_user((struct fstrim_range __user *)arg, &range,
1107				sizeof(range)))
1108		return -EFAULT;
1109	return 0;
1110}
1111
1112long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1113{
1114	switch (cmd) {
1115	case F2FS_IOC_GETFLAGS:
1116		return f2fs_ioc_getflags(filp, arg);
1117	case F2FS_IOC_SETFLAGS:
1118		return f2fs_ioc_setflags(filp, arg);
1119	case F2FS_IOC_GETVERSION:
1120		return f2fs_ioc_getversion(filp, arg);
1121	case F2FS_IOC_START_ATOMIC_WRITE:
1122		return f2fs_ioc_start_atomic_write(filp);
1123	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
1124		return f2fs_ioc_commit_atomic_write(filp);
1125	case F2FS_IOC_START_VOLATILE_WRITE:
1126		return f2fs_ioc_start_volatile_write(filp);
1127	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
1128		return f2fs_ioc_release_volatile_write(filp);
1129	case F2FS_IOC_ABORT_VOLATILE_WRITE:
1130		return f2fs_ioc_abort_volatile_write(filp);
1131	case F2FS_IOC_SHUTDOWN:
1132		return f2fs_ioc_shutdown(filp, arg);
1133	case FITRIM:
1134		return f2fs_ioc_fitrim(filp, arg);
1135	default:
1136		return -ENOTTY;
1137	}
1138}
1139
1140#ifdef CONFIG_COMPAT
1141long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1142{
1143	switch (cmd) {
1144	case F2FS_IOC32_GETFLAGS:
1145		cmd = F2FS_IOC_GETFLAGS;
1146		break;
1147	case F2FS_IOC32_SETFLAGS:
1148		cmd = F2FS_IOC_SETFLAGS;
1149		break;
1150	default:
1151		return -ENOIOCTLCMD;
1152	}
1153	return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
1154}
1155#endif
1156
1157const struct file_operations f2fs_file_operations = {
1158	.llseek		= f2fs_llseek,
1159	.read_iter	= generic_file_read_iter,
1160	.write_iter	= generic_file_write_iter,
1161	.open		= generic_file_open,
1162	.release	= f2fs_release_file,
1163	.mmap		= f2fs_file_mmap,
1164	.fsync		= f2fs_sync_file,
1165	.fallocate	= f2fs_fallocate,
1166	.unlocked_ioctl	= f2fs_ioctl,
1167#ifdef CONFIG_COMPAT
1168	.compat_ioctl	= f2fs_compat_ioctl,
1169#endif
1170	.splice_read	= generic_file_splice_read,
1171	.splice_write	= iter_file_splice_write,
1172};
1173