This source file includes following definitions.
- memfd_tag_pins
- memfd_wait_for_pins
- memfd_file_seals_ptr
- memfd_add_seals
- memfd_get_seals
- memfd_fcntl
- SYSCALL_DEFINE2
1
2
3
4
5
6
7
8
9
10 #include <linux/fs.h>
11 #include <linux/vfs.h>
12 #include <linux/pagemap.h>
13 #include <linux/file.h>
14 #include <linux/mm.h>
15 #include <linux/sched/signal.h>
16 #include <linux/khugepaged.h>
17 #include <linux/syscalls.h>
18 #include <linux/hugetlb.h>
19 #include <linux/shmem_fs.h>
20 #include <linux/memfd.h>
21 #include <uapi/linux/memfd.h>
22
23
24
25
26
27
28 #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE
29 #define LAST_SCAN 4
30
31 static void memfd_tag_pins(struct xa_state *xas)
32 {
33 struct page *page;
34 unsigned int tagged = 0;
35
36 lru_add_drain();
37
38 xas_lock_irq(xas);
39 xas_for_each(xas, page, ULONG_MAX) {
40 if (xa_is_value(page))
41 continue;
42 page = find_subpage(page, xas->xa_index);
43 if (page_count(page) - page_mapcount(page) > 1)
44 xas_set_mark(xas, MEMFD_TAG_PINNED);
45
46 if (++tagged % XA_CHECK_SCHED)
47 continue;
48
49 xas_pause(xas);
50 xas_unlock_irq(xas);
51 cond_resched();
52 xas_lock_irq(xas);
53 }
54 xas_unlock_irq(xas);
55 }
56
57
58
59
60
61
62
63
64
65
66 static int memfd_wait_for_pins(struct address_space *mapping)
67 {
68 XA_STATE(xas, &mapping->i_pages, 0);
69 struct page *page;
70 int error, scan;
71
72 memfd_tag_pins(&xas);
73
74 error = 0;
75 for (scan = 0; scan <= LAST_SCAN; scan++) {
76 unsigned int tagged = 0;
77
78 if (!xas_marked(&xas, MEMFD_TAG_PINNED))
79 break;
80
81 if (!scan)
82 lru_add_drain_all();
83 else if (schedule_timeout_killable((HZ << scan) / 200))
84 scan = LAST_SCAN;
85
86 xas_set(&xas, 0);
87 xas_lock_irq(&xas);
88 xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
89 bool clear = true;
90 if (xa_is_value(page))
91 continue;
92 page = find_subpage(page, xas.xa_index);
93 if (page_count(page) - page_mapcount(page) != 1) {
94
95
96
97
98
99 if (scan == LAST_SCAN)
100 error = -EBUSY;
101 else
102 clear = false;
103 }
104 if (clear)
105 xas_clear_mark(&xas, MEMFD_TAG_PINNED);
106 if (++tagged % XA_CHECK_SCHED)
107 continue;
108
109 xas_pause(&xas);
110 xas_unlock_irq(&xas);
111 cond_resched();
112 xas_lock_irq(&xas);
113 }
114 xas_unlock_irq(&xas);
115 }
116
117 return error;
118 }
119
120 static unsigned int *memfd_file_seals_ptr(struct file *file)
121 {
122 if (shmem_file(file))
123 return &SHMEM_I(file_inode(file))->seals;
124
125 #ifdef CONFIG_HUGETLBFS
126 if (is_file_hugepages(file))
127 return &HUGETLBFS_I(file_inode(file))->seals;
128 #endif
129
130 return NULL;
131 }
132
133 #define F_ALL_SEALS (F_SEAL_SEAL | \
134 F_SEAL_SHRINK | \
135 F_SEAL_GROW | \
136 F_SEAL_WRITE | \
137 F_SEAL_FUTURE_WRITE)
138
139 static int memfd_add_seals(struct file *file, unsigned int seals)
140 {
141 struct inode *inode = file_inode(file);
142 unsigned int *file_seals;
143 int error;
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176 if (!(file->f_mode & FMODE_WRITE))
177 return -EPERM;
178 if (seals & ~(unsigned int)F_ALL_SEALS)
179 return -EINVAL;
180
181 inode_lock(inode);
182
183 file_seals = memfd_file_seals_ptr(file);
184 if (!file_seals) {
185 error = -EINVAL;
186 goto unlock;
187 }
188
189 if (*file_seals & F_SEAL_SEAL) {
190 error = -EPERM;
191 goto unlock;
192 }
193
194 if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) {
195 error = mapping_deny_writable(file->f_mapping);
196 if (error)
197 goto unlock;
198
199 error = memfd_wait_for_pins(file->f_mapping);
200 if (error) {
201 mapping_allow_writable(file->f_mapping);
202 goto unlock;
203 }
204 }
205
206 *file_seals |= seals;
207 error = 0;
208
209 unlock:
210 inode_unlock(inode);
211 return error;
212 }
213
214 static int memfd_get_seals(struct file *file)
215 {
216 unsigned int *seals = memfd_file_seals_ptr(file);
217
218 return seals ? *seals : -EINVAL;
219 }
220
221 long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
222 {
223 long error;
224
225 switch (cmd) {
226 case F_ADD_SEALS:
227
228 if (arg > UINT_MAX)
229 return -EINVAL;
230
231 error = memfd_add_seals(file, arg);
232 break;
233 case F_GET_SEALS:
234 error = memfd_get_seals(file);
235 break;
236 default:
237 error = -EINVAL;
238 break;
239 }
240
241 return error;
242 }
243
244 #define MFD_NAME_PREFIX "memfd:"
245 #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
246 #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
247
248 #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
249
250 SYSCALL_DEFINE2(memfd_create,
251 const char __user *, uname,
252 unsigned int, flags)
253 {
254 unsigned int *file_seals;
255 struct file *file;
256 int fd, error;
257 char *name;
258 long len;
259
260 if (!(flags & MFD_HUGETLB)) {
261 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
262 return -EINVAL;
263 } else {
264
265 if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
266 (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
267 return -EINVAL;
268 }
269
270
271 len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
272 if (len <= 0)
273 return -EFAULT;
274 if (len > MFD_NAME_MAX_LEN + 1)
275 return -EINVAL;
276
277 name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL);
278 if (!name)
279 return -ENOMEM;
280
281 strcpy(name, MFD_NAME_PREFIX);
282 if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
283 error = -EFAULT;
284 goto err_name;
285 }
286
287
288 if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
289 error = -EFAULT;
290 goto err_name;
291 }
292
293 fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
294 if (fd < 0) {
295 error = fd;
296 goto err_name;
297 }
298
299 if (flags & MFD_HUGETLB) {
300 struct user_struct *user = NULL;
301
302 file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
303 HUGETLB_ANONHUGE_INODE,
304 (flags >> MFD_HUGE_SHIFT) &
305 MFD_HUGE_MASK);
306 } else
307 file = shmem_file_setup(name, 0, VM_NORESERVE);
308 if (IS_ERR(file)) {
309 error = PTR_ERR(file);
310 goto err_fd;
311 }
312 file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
313 file->f_flags |= O_LARGEFILE;
314
315 if (flags & MFD_ALLOW_SEALING) {
316 file_seals = memfd_file_seals_ptr(file);
317 *file_seals &= ~F_SEAL_SEAL;
318 }
319
320 fd_install(fd, file);
321 kfree(name);
322 return fd;
323
324 err_fd:
325 put_unused_fd(fd);
326 err_name:
327 kfree(name);
328 return error;
329 }