This source file includes following definitions.
- mcopy_atomic_pte
- mfill_zeropage_pte
- mm_alloc_pmd
- __mcopy_atomic_hugetlb
- mfill_atomic_pte
- __mcopy_atomic
- mcopy_atomic
- mfill_zeropage
1
2
3
4
5
6
7
8 #include <linux/mm.h>
9 #include <linux/sched/signal.h>
10 #include <linux/pagemap.h>
11 #include <linux/rmap.h>
12 #include <linux/swap.h>
13 #include <linux/swapops.h>
14 #include <linux/userfaultfd_k.h>
15 #include <linux/mmu_notifier.h>
16 #include <linux/hugetlb.h>
17 #include <linux/shmem_fs.h>
18 #include <asm/tlbflush.h>
19 #include "internal.h"
20
21 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
22 pmd_t *dst_pmd,
23 struct vm_area_struct *dst_vma,
24 unsigned long dst_addr,
25 unsigned long src_addr,
26 struct page **pagep)
27 {
28 struct mem_cgroup *memcg;
29 pte_t _dst_pte, *dst_pte;
30 spinlock_t *ptl;
31 void *page_kaddr;
32 int ret;
33 struct page *page;
34 pgoff_t offset, max_off;
35 struct inode *inode;
36
37 if (!*pagep) {
38 ret = -ENOMEM;
39 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
40 if (!page)
41 goto out;
42
43 page_kaddr = kmap_atomic(page);
44 ret = copy_from_user(page_kaddr,
45 (const void __user *) src_addr,
46 PAGE_SIZE);
47 kunmap_atomic(page_kaddr);
48
49
50 if (unlikely(ret)) {
51 ret = -ENOENT;
52 *pagep = page;
53
54 goto out;
55 }
56 } else {
57 page = *pagep;
58 *pagep = NULL;
59 }
60
61
62
63
64
65
66 __SetPageUptodate(page);
67
68 ret = -ENOMEM;
69 if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
70 goto out_release;
71
72 _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
73 if (dst_vma->vm_flags & VM_WRITE)
74 _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
75
76 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
77 if (dst_vma->vm_file) {
78
79 inode = dst_vma->vm_file->f_inode;
80 offset = linear_page_index(dst_vma, dst_addr);
81 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
82 ret = -EFAULT;
83 if (unlikely(offset >= max_off))
84 goto out_release_uncharge_unlock;
85 }
86 ret = -EEXIST;
87 if (!pte_none(*dst_pte))
88 goto out_release_uncharge_unlock;
89
90 inc_mm_counter(dst_mm, MM_ANONPAGES);
91 page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
92 mem_cgroup_commit_charge(page, memcg, false, false);
93 lru_cache_add_active_or_unevictable(page, dst_vma);
94
95 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
96
97
98 update_mmu_cache(dst_vma, dst_addr, dst_pte);
99
100 pte_unmap_unlock(dst_pte, ptl);
101 ret = 0;
102 out:
103 return ret;
104 out_release_uncharge_unlock:
105 pte_unmap_unlock(dst_pte, ptl);
106 mem_cgroup_cancel_charge(page, memcg, false);
107 out_release:
108 put_page(page);
109 goto out;
110 }
111
112 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
113 pmd_t *dst_pmd,
114 struct vm_area_struct *dst_vma,
115 unsigned long dst_addr)
116 {
117 pte_t _dst_pte, *dst_pte;
118 spinlock_t *ptl;
119 int ret;
120 pgoff_t offset, max_off;
121 struct inode *inode;
122
123 _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
124 dst_vma->vm_page_prot));
125 dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
126 if (dst_vma->vm_file) {
127
128 inode = dst_vma->vm_file->f_inode;
129 offset = linear_page_index(dst_vma, dst_addr);
130 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
131 ret = -EFAULT;
132 if (unlikely(offset >= max_off))
133 goto out_unlock;
134 }
135 ret = -EEXIST;
136 if (!pte_none(*dst_pte))
137 goto out_unlock;
138 set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
139
140 update_mmu_cache(dst_vma, dst_addr, dst_pte);
141 ret = 0;
142 out_unlock:
143 pte_unmap_unlock(dst_pte, ptl);
144 return ret;
145 }
146
147 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
148 {
149 pgd_t *pgd;
150 p4d_t *p4d;
151 pud_t *pud;
152
153 pgd = pgd_offset(mm, address);
154 p4d = p4d_alloc(mm, pgd, address);
155 if (!p4d)
156 return NULL;
157 pud = pud_alloc(mm, p4d, address);
158 if (!pud)
159 return NULL;
160
161
162
163
164
165 return pmd_alloc(mm, pud, address);
166 }
167
168 #ifdef CONFIG_HUGETLB_PAGE
169
170
171
172
173 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
174 struct vm_area_struct *dst_vma,
175 unsigned long dst_start,
176 unsigned long src_start,
177 unsigned long len,
178 bool zeropage)
179 {
180 int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
181 int vm_shared = dst_vma->vm_flags & VM_SHARED;
182 ssize_t err;
183 pte_t *dst_pte;
184 unsigned long src_addr, dst_addr;
185 long copied;
186 struct page *page;
187 struct hstate *h;
188 unsigned long vma_hpagesize;
189 pgoff_t idx;
190 u32 hash;
191 struct address_space *mapping;
192
193
194
195
196
197
198
199 if (zeropage) {
200 up_read(&dst_mm->mmap_sem);
201 return -EINVAL;
202 }
203
204 src_addr = src_start;
205 dst_addr = dst_start;
206 copied = 0;
207 page = NULL;
208 vma_hpagesize = vma_kernel_pagesize(dst_vma);
209
210
211
212
213 err = -EINVAL;
214 if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
215 goto out_unlock;
216
217 retry:
218
219
220
221
222 if (!dst_vma) {
223 err = -ENOENT;
224 dst_vma = find_vma(dst_mm, dst_start);
225 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
226 goto out_unlock;
227
228
229
230
231
232 if (!dst_vma->vm_userfaultfd_ctx.ctx)
233 goto out_unlock;
234
235 if (dst_start < dst_vma->vm_start ||
236 dst_start + len > dst_vma->vm_end)
237 goto out_unlock;
238
239 err = -EINVAL;
240 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
241 goto out_unlock;
242
243 vm_shared = dst_vma->vm_flags & VM_SHARED;
244 }
245
246 if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
247 (len - copied) & (vma_hpagesize - 1)))
248 goto out_unlock;
249
250
251
252
253 err = -ENOMEM;
254 if (!vm_shared) {
255 if (unlikely(anon_vma_prepare(dst_vma)))
256 goto out_unlock;
257 }
258
259 h = hstate_vma(dst_vma);
260
261 while (src_addr < src_start + len) {
262 pte_t dst_pteval;
263
264 BUG_ON(dst_addr >= dst_start + len);
265 VM_BUG_ON(dst_addr & ~huge_page_mask(h));
266
267
268
269
270 idx = linear_page_index(dst_vma, dst_addr);
271 mapping = dst_vma->vm_file->f_mapping;
272 hash = hugetlb_fault_mutex_hash(h, mapping, idx, dst_addr);
273 mutex_lock(&hugetlb_fault_mutex_table[hash]);
274
275 err = -ENOMEM;
276 dst_pte = huge_pte_alloc(dst_mm, dst_addr, huge_page_size(h));
277 if (!dst_pte) {
278 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
279 goto out_unlock;
280 }
281
282 err = -EEXIST;
283 dst_pteval = huge_ptep_get(dst_pte);
284 if (!huge_pte_none(dst_pteval)) {
285 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
286 goto out_unlock;
287 }
288
289 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
290 dst_addr, src_addr, &page);
291
292 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
293 vm_alloc_shared = vm_shared;
294
295 cond_resched();
296
297 if (unlikely(err == -ENOENT)) {
298 up_read(&dst_mm->mmap_sem);
299 BUG_ON(!page);
300
301 err = copy_huge_page_from_user(page,
302 (const void __user *)src_addr,
303 pages_per_huge_page(h), true);
304 if (unlikely(err)) {
305 err = -EFAULT;
306 goto out;
307 }
308 down_read(&dst_mm->mmap_sem);
309
310 dst_vma = NULL;
311 goto retry;
312 } else
313 BUG_ON(page);
314
315 if (!err) {
316 dst_addr += vma_hpagesize;
317 src_addr += vma_hpagesize;
318 copied += vma_hpagesize;
319
320 if (fatal_signal_pending(current))
321 err = -EINTR;
322 }
323 if (err)
324 break;
325 }
326
327 out_unlock:
328 up_read(&dst_mm->mmap_sem);
329 out:
330 if (page) {
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372 if (vm_alloc_shared)
373 SetPagePrivate(page);
374 else
375 ClearPagePrivate(page);
376 put_page(page);
377 }
378 BUG_ON(copied < 0);
379 BUG_ON(err > 0);
380 BUG_ON(!copied && !err);
381 return copied ? copied : err;
382 }
383 #else
384
385 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
386 struct vm_area_struct *dst_vma,
387 unsigned long dst_start,
388 unsigned long src_start,
389 unsigned long len,
390 bool zeropage);
391 #endif
392
393 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
394 pmd_t *dst_pmd,
395 struct vm_area_struct *dst_vma,
396 unsigned long dst_addr,
397 unsigned long src_addr,
398 struct page **page,
399 bool zeropage)
400 {
401 ssize_t err;
402
403
404
405
406
407
408
409
410
411
412
413 if (!(dst_vma->vm_flags & VM_SHARED)) {
414 if (!zeropage)
415 err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
416 dst_addr, src_addr, page);
417 else
418 err = mfill_zeropage_pte(dst_mm, dst_pmd,
419 dst_vma, dst_addr);
420 } else {
421 if (!zeropage)
422 err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
423 dst_vma, dst_addr,
424 src_addr, page);
425 else
426 err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd,
427 dst_vma, dst_addr);
428 }
429
430 return err;
431 }
432
433 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
434 unsigned long dst_start,
435 unsigned long src_start,
436 unsigned long len,
437 bool zeropage,
438 bool *mmap_changing)
439 {
440 struct vm_area_struct *dst_vma;
441 ssize_t err;
442 pmd_t *dst_pmd;
443 unsigned long src_addr, dst_addr;
444 long copied;
445 struct page *page;
446
447
448
449
450 BUG_ON(dst_start & ~PAGE_MASK);
451 BUG_ON(len & ~PAGE_MASK);
452
453
454 BUG_ON(src_start + len <= src_start);
455 BUG_ON(dst_start + len <= dst_start);
456
457 src_addr = src_start;
458 dst_addr = dst_start;
459 copied = 0;
460 page = NULL;
461 retry:
462 down_read(&dst_mm->mmap_sem);
463
464
465
466
467
468
469 err = -EAGAIN;
470 if (mmap_changing && READ_ONCE(*mmap_changing))
471 goto out_unlock;
472
473
474
475
476
477 err = -ENOENT;
478 dst_vma = find_vma(dst_mm, dst_start);
479 if (!dst_vma)
480 goto out_unlock;
481
482
483
484
485
486 if (!dst_vma->vm_userfaultfd_ctx.ctx)
487 goto out_unlock;
488
489 if (dst_start < dst_vma->vm_start ||
490 dst_start + len > dst_vma->vm_end)
491 goto out_unlock;
492
493 err = -EINVAL;
494
495
496
497
498 if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
499 dst_vma->vm_flags & VM_SHARED))
500 goto out_unlock;
501
502
503
504
505 if (is_vm_hugetlb_page(dst_vma))
506 return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
507 src_start, len, zeropage);
508
509 if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
510 goto out_unlock;
511
512
513
514
515
516
517 err = -ENOMEM;
518 if (!(dst_vma->vm_flags & VM_SHARED) &&
519 unlikely(anon_vma_prepare(dst_vma)))
520 goto out_unlock;
521
522 while (src_addr < src_start + len) {
523 pmd_t dst_pmdval;
524
525 BUG_ON(dst_addr >= dst_start + len);
526
527 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
528 if (unlikely(!dst_pmd)) {
529 err = -ENOMEM;
530 break;
531 }
532
533 dst_pmdval = pmd_read_atomic(dst_pmd);
534
535
536
537
538 if (unlikely(pmd_trans_huge(dst_pmdval))) {
539 err = -EEXIST;
540 break;
541 }
542 if (unlikely(pmd_none(dst_pmdval)) &&
543 unlikely(__pte_alloc(dst_mm, dst_pmd))) {
544 err = -ENOMEM;
545 break;
546 }
547
548 if (unlikely(pmd_trans_huge(*dst_pmd))) {
549 err = -EFAULT;
550 break;
551 }
552
553 BUG_ON(pmd_none(*dst_pmd));
554 BUG_ON(pmd_trans_huge(*dst_pmd));
555
556 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
557 src_addr, &page, zeropage);
558 cond_resched();
559
560 if (unlikely(err == -ENOENT)) {
561 void *page_kaddr;
562
563 up_read(&dst_mm->mmap_sem);
564 BUG_ON(!page);
565
566 page_kaddr = kmap(page);
567 err = copy_from_user(page_kaddr,
568 (const void __user *) src_addr,
569 PAGE_SIZE);
570 kunmap(page);
571 if (unlikely(err)) {
572 err = -EFAULT;
573 goto out;
574 }
575 goto retry;
576 } else
577 BUG_ON(page);
578
579 if (!err) {
580 dst_addr += PAGE_SIZE;
581 src_addr += PAGE_SIZE;
582 copied += PAGE_SIZE;
583
584 if (fatal_signal_pending(current))
585 err = -EINTR;
586 }
587 if (err)
588 break;
589 }
590
591 out_unlock:
592 up_read(&dst_mm->mmap_sem);
593 out:
594 if (page)
595 put_page(page);
596 BUG_ON(copied < 0);
597 BUG_ON(err > 0);
598 BUG_ON(!copied && !err);
599 return copied ? copied : err;
600 }
601
602 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
603 unsigned long src_start, unsigned long len,
604 bool *mmap_changing)
605 {
606 return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
607 mmap_changing);
608 }
609
610 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
611 unsigned long len, bool *mmap_changing)
612 {
613 return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
614 }