1/*
2 * mm/fadvise.c
3 *
4 * Copyright (C) 2002, Linus Torvalds
5 *
6 * 11Jan2003	Andrew Morton
7 *		Initial version.
8 */
9
10#include <linux/kernel.h>
11#include <linux/file.h>
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/pagemap.h>
15#include <linux/backing-dev.h>
16#include <linux/pagevec.h>
17#include <linux/fadvise.h>
18#include <linux/writeback.h>
19#include <linux/syscalls.h>
20#include <linux/swap.h>
21
22#include <asm/unistd.h>
23
24/*
25 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
26 * deactivate the pages and clear PG_Referenced.
27 */
28SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
29{
30	struct fd f = fdget(fd);
31	struct inode *inode;
32	struct address_space *mapping;
33	struct backing_dev_info *bdi;
34	loff_t endbyte;			/* inclusive */
35	pgoff_t start_index;
36	pgoff_t end_index;
37	unsigned long nrpages;
38	int ret = 0;
39
40	if (!f.file)
41		return -EBADF;
42
43	inode = file_inode(f.file);
44	if (S_ISFIFO(inode->i_mode)) {
45		ret = -ESPIPE;
46		goto out;
47	}
48
49	mapping = f.file->f_mapping;
50	if (!mapping || len < 0) {
51		ret = -EINVAL;
52		goto out;
53	}
54
55	if (IS_DAX(inode)) {
56		switch (advice) {
57		case POSIX_FADV_NORMAL:
58		case POSIX_FADV_RANDOM:
59		case POSIX_FADV_SEQUENTIAL:
60		case POSIX_FADV_WILLNEED:
61		case POSIX_FADV_NOREUSE:
62		case POSIX_FADV_DONTNEED:
63			/* no bad return value, but ignore advice */
64			break;
65		default:
66			ret = -EINVAL;
67		}
68		goto out;
69	}
70
71	/* Careful about overflows. Len == 0 means "as much as possible" */
72	endbyte = offset + len;
73	if (!len || endbyte < len)
74		endbyte = -1;
75	else
76		endbyte--;		/* inclusive */
77
78	bdi = inode_to_bdi(mapping->host);
79
80	switch (advice) {
81	case POSIX_FADV_NORMAL:
82		f.file->f_ra.ra_pages = bdi->ra_pages;
83		spin_lock(&f.file->f_lock);
84		f.file->f_mode &= ~FMODE_RANDOM;
85		spin_unlock(&f.file->f_lock);
86		break;
87	case POSIX_FADV_RANDOM:
88		spin_lock(&f.file->f_lock);
89		f.file->f_mode |= FMODE_RANDOM;
90		spin_unlock(&f.file->f_lock);
91		break;
92	case POSIX_FADV_SEQUENTIAL:
93		f.file->f_ra.ra_pages = bdi->ra_pages * 2;
94		spin_lock(&f.file->f_lock);
95		f.file->f_mode &= ~FMODE_RANDOM;
96		spin_unlock(&f.file->f_lock);
97		break;
98	case POSIX_FADV_WILLNEED:
99		/* First and last PARTIAL page! */
100		start_index = offset >> PAGE_CACHE_SHIFT;
101		end_index = endbyte >> PAGE_CACHE_SHIFT;
102
103		/* Careful about overflow on the "+1" */
104		nrpages = end_index - start_index + 1;
105		if (!nrpages)
106			nrpages = ~0UL;
107
108		/*
109		 * Ignore return value because fadvise() shall return
110		 * success even if filesystem can't retrieve a hint,
111		 */
112		force_page_cache_readahead(mapping, f.file, start_index,
113					   nrpages);
114		break;
115	case POSIX_FADV_NOREUSE:
116		break;
117	case POSIX_FADV_DONTNEED:
118		if (!inode_write_congested(mapping->host))
119			__filemap_fdatawrite_range(mapping, offset, endbyte,
120						   WB_SYNC_NONE);
121
122		/*
123		 * First and last FULL page! Partial pages are deliberately
124		 * preserved on the expectation that it is better to preserve
125		 * needed memory than to discard unneeded memory.
126		 */
127		start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
128		end_index = (endbyte >> PAGE_CACHE_SHIFT);
129
130		if (end_index >= start_index) {
131			unsigned long count = invalidate_mapping_pages(mapping,
132						start_index, end_index);
133
134			/*
135			 * If fewer pages were invalidated than expected then
136			 * it is possible that some of the pages were on
137			 * a per-cpu pagevec for a remote CPU. Drain all
138			 * pagevecs and try again.
139			 */
140			if (count < (end_index - start_index + 1)) {
141				lru_add_drain_all();
142				invalidate_mapping_pages(mapping, start_index,
143						end_index);
144			}
145		}
146		break;
147	default:
148		ret = -EINVAL;
149	}
150out:
151	fdput(f);
152	return ret;
153}
154
155#ifdef __ARCH_WANT_SYS_FADVISE64
156
157SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
158{
159	return sys_fadvise64_64(fd, offset, len, advice);
160}
161
162#endif
163