This source file includes following definitions.
- put_user_pages_dirty_lock
- put_user_pages
- no_page_table
- follow_pfn_pte
- can_follow_write_pte
- should_force_cow_break
- follow_page_pte
- follow_pmd_mask
- follow_pud_mask
- follow_p4d_mask
- follow_page_mask
- follow_page
- get_gate_page
- faultin_page
- check_vma_flags
- __get_user_pages
- vma_permits_fault
- fixup_user_fault
- __get_user_pages_locked
- get_user_pages_remote
- populate_vma_page_range
- __mm_populate
- get_dump_page
- __get_user_pages_locked
- check_dax_vmas
- new_non_cma_page
- check_and_migrate_cma_pages
- check_and_migrate_cma_pages
- __gup_longterm_locked
- __gup_longterm_locked
- get_user_pages
- get_user_pages_locked
- get_user_pages_unlocked
- gup_get_pte
- gup_get_pte
- undo_dev_pagemap
- try_get_compound_head
- gup_pte_range
- gup_pte_range
- __gup_device_huge
- __gup_device_huge_pmd
- __gup_device_huge_pud
- __gup_device_huge_pmd
- __gup_device_huge_pud
- hugepte_addr_end
- gup_hugepte
- gup_huge_pd
- gup_huge_pd
- gup_huge_pmd
- gup_huge_pud
- gup_huge_pgd
- gup_pmd_range
- gup_pud_range
- gup_p4d_range
- gup_pgd_range
- gup_pgd_range
- gup_fast_permitted
- __get_user_pages_fast
- __gup_longterm_unlocked
- get_user_pages_fast
1
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/err.h>
5 #include <linux/spinlock.h>
6
7 #include <linux/mm.h>
8 #include <linux/memremap.h>
9 #include <linux/pagemap.h>
10 #include <linux/rmap.h>
11 #include <linux/swap.h>
12 #include <linux/swapops.h>
13
14 #include <linux/sched/signal.h>
15 #include <linux/rwsem.h>
16 #include <linux/hugetlb.h>
17 #include <linux/migrate.h>
18 #include <linux/mm_inline.h>
19 #include <linux/sched/mm.h>
20
21 #include <asm/mmu_context.h>
22 #include <asm/pgtable.h>
23 #include <asm/tlbflush.h>
24
25 #include "internal.h"
26
27 struct follow_page_context {
28 struct dev_pagemap *pgmap;
29 unsigned int page_mask;
30 };
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
55 bool make_dirty)
56 {
57 unsigned long index;
58
59
60
61
62
63
64
65 if (!make_dirty) {
66 put_user_pages(pages, npages);
67 return;
68 }
69
70 for (index = 0; index < npages; index++) {
71 struct page *page = compound_head(pages[index]);
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92 if (!PageDirty(page))
93 set_page_dirty_lock(page);
94 put_user_page(page);
95 }
96 }
97 EXPORT_SYMBOL(put_user_pages_dirty_lock);
98
99
100
101
102
103
104
105
106
107
108 void put_user_pages(struct page **pages, unsigned long npages)
109 {
110 unsigned long index;
111
112
113
114
115
116
117 for (index = 0; index < npages; index++)
118 put_user_page(pages[index]);
119 }
120 EXPORT_SYMBOL(put_user_pages);
121
122 #ifdef CONFIG_MMU
123 static struct page *no_page_table(struct vm_area_struct *vma,
124 unsigned int flags)
125 {
126
127
128
129
130
131
132
133
134 if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
135 return ERR_PTR(-EFAULT);
136 return NULL;
137 }
138
139 static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
140 pte_t *pte, unsigned int flags)
141 {
142
143 if (flags & FOLL_GET)
144 return -EFAULT;
145
146 if (flags & FOLL_TOUCH) {
147 pte_t entry = *pte;
148
149 if (flags & FOLL_WRITE)
150 entry = pte_mkdirty(entry);
151 entry = pte_mkyoung(entry);
152
153 if (!pte_same(*pte, entry)) {
154 set_pte_at(vma->vm_mm, address, pte, entry);
155 update_mmu_cache(vma, address, pte);
156 }
157 }
158
159
160 return -EEXIST;
161 }
162
163
164
165
166
167 static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
168 {
169 return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte));
170 }
171
172
173
174
175
176
177 static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags)
178 {
179 return is_cow_mapping(vma->vm_flags) && (flags & FOLL_GET);
180 }
181
182 static struct page *follow_page_pte(struct vm_area_struct *vma,
183 unsigned long address, pmd_t *pmd, unsigned int flags,
184 struct dev_pagemap **pgmap)
185 {
186 struct mm_struct *mm = vma->vm_mm;
187 struct page *page;
188 spinlock_t *ptl;
189 pte_t *ptep, pte;
190
191 retry:
192 if (unlikely(pmd_bad(*pmd)))
193 return no_page_table(vma, flags);
194
195 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
196 pte = *ptep;
197 if (!pte_present(pte)) {
198 swp_entry_t entry;
199
200
201
202
203
204 if (likely(!(flags & FOLL_MIGRATION)))
205 goto no_page;
206 if (pte_none(pte))
207 goto no_page;
208 entry = pte_to_swp_entry(pte);
209 if (!is_migration_entry(entry))
210 goto no_page;
211 pte_unmap_unlock(ptep, ptl);
212 migration_entry_wait(mm, pmd, address);
213 goto retry;
214 }
215 if ((flags & FOLL_NUMA) && pte_protnone(pte))
216 goto no_page;
217 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
218 pte_unmap_unlock(ptep, ptl);
219 return NULL;
220 }
221
222 page = vm_normal_page(vma, address, pte);
223 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
224
225
226
227
228 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
229 if (*pgmap)
230 page = pte_page(pte);
231 else
232 goto no_page;
233 } else if (unlikely(!page)) {
234 if (flags & FOLL_DUMP) {
235
236 page = ERR_PTR(-EFAULT);
237 goto out;
238 }
239
240 if (is_zero_pfn(pte_pfn(pte))) {
241 page = pte_page(pte);
242 } else {
243 int ret;
244
245 ret = follow_pfn_pte(vma, address, ptep, flags);
246 page = ERR_PTR(ret);
247 goto out;
248 }
249 }
250
251 if (flags & FOLL_SPLIT && PageTransCompound(page)) {
252 int ret;
253 get_page(page);
254 pte_unmap_unlock(ptep, ptl);
255 lock_page(page);
256 ret = split_huge_page(page);
257 unlock_page(page);
258 put_page(page);
259 if (ret)
260 return ERR_PTR(ret);
261 goto retry;
262 }
263
264 if (flags & FOLL_GET) {
265 if (unlikely(!try_get_page(page))) {
266 page = ERR_PTR(-ENOMEM);
267 goto out;
268 }
269 }
270 if (flags & FOLL_TOUCH) {
271 if ((flags & FOLL_WRITE) &&
272 !pte_dirty(pte) && !PageDirty(page))
273 set_page_dirty(page);
274
275
276
277
278
279 mark_page_accessed(page);
280 }
281 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
282
283 if (PageTransCompound(page))
284 goto out;
285
286
287
288
289
290
291
292
293
294
295 if (page->mapping && trylock_page(page)) {
296 lru_add_drain();
297
298
299
300
301
302
303 mlock_vma_page(page);
304 unlock_page(page);
305 }
306 }
307 out:
308 pte_unmap_unlock(ptep, ptl);
309 return page;
310 no_page:
311 pte_unmap_unlock(ptep, ptl);
312 if (!pte_none(pte))
313 return NULL;
314 return no_page_table(vma, flags);
315 }
316
317 static struct page *follow_pmd_mask(struct vm_area_struct *vma,
318 unsigned long address, pud_t *pudp,
319 unsigned int flags,
320 struct follow_page_context *ctx)
321 {
322 pmd_t *pmd, pmdval;
323 spinlock_t *ptl;
324 struct page *page;
325 struct mm_struct *mm = vma->vm_mm;
326
327 pmd = pmd_offset(pudp, address);
328
329
330
331
332 pmdval = READ_ONCE(*pmd);
333 if (pmd_none(pmdval))
334 return no_page_table(vma, flags);
335 if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
336 page = follow_huge_pmd(mm, address, pmd, flags);
337 if (page)
338 return page;
339 return no_page_table(vma, flags);
340 }
341 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
342 page = follow_huge_pd(vma, address,
343 __hugepd(pmd_val(pmdval)), flags,
344 PMD_SHIFT);
345 if (page)
346 return page;
347 return no_page_table(vma, flags);
348 }
349 retry:
350 if (!pmd_present(pmdval)) {
351 if (likely(!(flags & FOLL_MIGRATION)))
352 return no_page_table(vma, flags);
353 VM_BUG_ON(thp_migration_supported() &&
354 !is_pmd_migration_entry(pmdval));
355 if (is_pmd_migration_entry(pmdval))
356 pmd_migration_entry_wait(mm, pmd);
357 pmdval = READ_ONCE(*pmd);
358
359
360
361
362 if (pmd_none(pmdval))
363 return no_page_table(vma, flags);
364 goto retry;
365 }
366 if (pmd_devmap(pmdval)) {
367 ptl = pmd_lock(mm, pmd);
368 page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
369 spin_unlock(ptl);
370 if (page)
371 return page;
372 }
373 if (likely(!pmd_trans_huge(pmdval)))
374 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
375
376 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
377 return no_page_table(vma, flags);
378
379 retry_locked:
380 ptl = pmd_lock(mm, pmd);
381 if (unlikely(pmd_none(*pmd))) {
382 spin_unlock(ptl);
383 return no_page_table(vma, flags);
384 }
385 if (unlikely(!pmd_present(*pmd))) {
386 spin_unlock(ptl);
387 if (likely(!(flags & FOLL_MIGRATION)))
388 return no_page_table(vma, flags);
389 pmd_migration_entry_wait(mm, pmd);
390 goto retry_locked;
391 }
392 if (unlikely(!pmd_trans_huge(*pmd))) {
393 spin_unlock(ptl);
394 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
395 }
396 if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
397 int ret;
398 page = pmd_page(*pmd);
399 if (is_huge_zero_page(page)) {
400 spin_unlock(ptl);
401 ret = 0;
402 split_huge_pmd(vma, pmd, address);
403 if (pmd_trans_unstable(pmd))
404 ret = -EBUSY;
405 } else if (flags & FOLL_SPLIT) {
406 if (unlikely(!try_get_page(page))) {
407 spin_unlock(ptl);
408 return ERR_PTR(-ENOMEM);
409 }
410 spin_unlock(ptl);
411 lock_page(page);
412 ret = split_huge_page(page);
413 unlock_page(page);
414 put_page(page);
415 if (pmd_none(*pmd))
416 return no_page_table(vma, flags);
417 } else {
418 spin_unlock(ptl);
419 split_huge_pmd(vma, pmd, address);
420 ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
421 }
422
423 return ret ? ERR_PTR(ret) :
424 follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
425 }
426 page = follow_trans_huge_pmd(vma, address, pmd, flags);
427 spin_unlock(ptl);
428 ctx->page_mask = HPAGE_PMD_NR - 1;
429 return page;
430 }
431
432 static struct page *follow_pud_mask(struct vm_area_struct *vma,
433 unsigned long address, p4d_t *p4dp,
434 unsigned int flags,
435 struct follow_page_context *ctx)
436 {
437 pud_t *pud;
438 spinlock_t *ptl;
439 struct page *page;
440 struct mm_struct *mm = vma->vm_mm;
441
442 pud = pud_offset(p4dp, address);
443 if (pud_none(*pud))
444 return no_page_table(vma, flags);
445 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
446 page = follow_huge_pud(mm, address, pud, flags);
447 if (page)
448 return page;
449 return no_page_table(vma, flags);
450 }
451 if (is_hugepd(__hugepd(pud_val(*pud)))) {
452 page = follow_huge_pd(vma, address,
453 __hugepd(pud_val(*pud)), flags,
454 PUD_SHIFT);
455 if (page)
456 return page;
457 return no_page_table(vma, flags);
458 }
459 if (pud_devmap(*pud)) {
460 ptl = pud_lock(mm, pud);
461 page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
462 spin_unlock(ptl);
463 if (page)
464 return page;
465 }
466 if (unlikely(pud_bad(*pud)))
467 return no_page_table(vma, flags);
468
469 return follow_pmd_mask(vma, address, pud, flags, ctx);
470 }
471
472 static struct page *follow_p4d_mask(struct vm_area_struct *vma,
473 unsigned long address, pgd_t *pgdp,
474 unsigned int flags,
475 struct follow_page_context *ctx)
476 {
477 p4d_t *p4d;
478 struct page *page;
479
480 p4d = p4d_offset(pgdp, address);
481 if (p4d_none(*p4d))
482 return no_page_table(vma, flags);
483 BUILD_BUG_ON(p4d_huge(*p4d));
484 if (unlikely(p4d_bad(*p4d)))
485 return no_page_table(vma, flags);
486
487 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
488 page = follow_huge_pd(vma, address,
489 __hugepd(p4d_val(*p4d)), flags,
490 P4D_SHIFT);
491 if (page)
492 return page;
493 return no_page_table(vma, flags);
494 }
495 return follow_pud_mask(vma, address, p4d, flags, ctx);
496 }
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517 static struct page *follow_page_mask(struct vm_area_struct *vma,
518 unsigned long address, unsigned int flags,
519 struct follow_page_context *ctx)
520 {
521 pgd_t *pgd;
522 struct page *page;
523 struct mm_struct *mm = vma->vm_mm;
524
525 ctx->page_mask = 0;
526
527
528 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
529 if (!IS_ERR(page)) {
530 BUG_ON(flags & FOLL_GET);
531 return page;
532 }
533
534 pgd = pgd_offset(mm, address);
535
536 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
537 return no_page_table(vma, flags);
538
539 if (pgd_huge(*pgd)) {
540 page = follow_huge_pgd(mm, address, pgd, flags);
541 if (page)
542 return page;
543 return no_page_table(vma, flags);
544 }
545 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
546 page = follow_huge_pd(vma, address,
547 __hugepd(pgd_val(*pgd)), flags,
548 PGDIR_SHIFT);
549 if (page)
550 return page;
551 return no_page_table(vma, flags);
552 }
553
554 return follow_p4d_mask(vma, address, pgd, flags, ctx);
555 }
556
557 struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
558 unsigned int foll_flags)
559 {
560 struct follow_page_context ctx = { NULL };
561 struct page *page;
562
563 page = follow_page_mask(vma, address, foll_flags, &ctx);
564 if (ctx.pgmap)
565 put_dev_pagemap(ctx.pgmap);
566 return page;
567 }
568
569 static int get_gate_page(struct mm_struct *mm, unsigned long address,
570 unsigned int gup_flags, struct vm_area_struct **vma,
571 struct page **page)
572 {
573 pgd_t *pgd;
574 p4d_t *p4d;
575 pud_t *pud;
576 pmd_t *pmd;
577 pte_t *pte;
578 int ret = -EFAULT;
579
580
581 if (gup_flags & FOLL_WRITE)
582 return -EFAULT;
583 if (address > TASK_SIZE)
584 pgd = pgd_offset_k(address);
585 else
586 pgd = pgd_offset_gate(mm, address);
587 if (pgd_none(*pgd))
588 return -EFAULT;
589 p4d = p4d_offset(pgd, address);
590 if (p4d_none(*p4d))
591 return -EFAULT;
592 pud = pud_offset(p4d, address);
593 if (pud_none(*pud))
594 return -EFAULT;
595 pmd = pmd_offset(pud, address);
596 if (!pmd_present(*pmd))
597 return -EFAULT;
598 VM_BUG_ON(pmd_trans_huge(*pmd));
599 pte = pte_offset_map(pmd, address);
600 if (pte_none(*pte))
601 goto unmap;
602 *vma = get_gate_vma(mm);
603 if (!page)
604 goto out;
605 *page = vm_normal_page(*vma, address, *pte);
606 if (!*page) {
607 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
608 goto unmap;
609 *page = pte_page(*pte);
610 }
611 if (unlikely(!try_get_page(*page))) {
612 ret = -ENOMEM;
613 goto unmap;
614 }
615 out:
616 ret = 0;
617 unmap:
618 pte_unmap(pte);
619 return ret;
620 }
621
622
623
624
625
626
627 static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
628 unsigned long address, unsigned int *flags, int *nonblocking)
629 {
630 unsigned int fault_flags = 0;
631 vm_fault_t ret;
632
633
634 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
635 return -ENOENT;
636 if (*flags & FOLL_WRITE)
637 fault_flags |= FAULT_FLAG_WRITE;
638 if (*flags & FOLL_REMOTE)
639 fault_flags |= FAULT_FLAG_REMOTE;
640 if (nonblocking)
641 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
642 if (*flags & FOLL_NOWAIT)
643 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
644 if (*flags & FOLL_TRIED) {
645 VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
646 fault_flags |= FAULT_FLAG_TRIED;
647 }
648
649 ret = handle_mm_fault(vma, address, fault_flags);
650 if (ret & VM_FAULT_ERROR) {
651 int err = vm_fault_to_errno(ret, *flags);
652
653 if (err)
654 return err;
655 BUG();
656 }
657
658 if (tsk) {
659 if (ret & VM_FAULT_MAJOR)
660 tsk->maj_flt++;
661 else
662 tsk->min_flt++;
663 }
664
665 if (ret & VM_FAULT_RETRY) {
666 if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
667 *nonblocking = 0;
668 return -EBUSY;
669 }
670
671
672
673
674
675
676
677
678
679
680 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
681 *flags |= FOLL_COW;
682 return 0;
683 }
684
685 static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
686 {
687 vm_flags_t vm_flags = vma->vm_flags;
688 int write = (gup_flags & FOLL_WRITE);
689 int foreign = (gup_flags & FOLL_REMOTE);
690
691 if (vm_flags & (VM_IO | VM_PFNMAP))
692 return -EFAULT;
693
694 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
695 return -EFAULT;
696
697 if (write) {
698 if (!(vm_flags & VM_WRITE)) {
699 if (!(gup_flags & FOLL_FORCE))
700 return -EFAULT;
701
702
703
704
705
706
707
708
709
710 if (!is_cow_mapping(vm_flags))
711 return -EFAULT;
712 }
713 } else if (!(vm_flags & VM_READ)) {
714 if (!(gup_flags & FOLL_FORCE))
715 return -EFAULT;
716
717
718
719
720 if (!(vm_flags & VM_MAYREAD))
721 return -EFAULT;
722 }
723
724
725
726
727 if (!arch_vma_access_permitted(vma, write, false, foreign))
728 return -EFAULT;
729 return 0;
730 }
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788 static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
789 unsigned long start, unsigned long nr_pages,
790 unsigned int gup_flags, struct page **pages,
791 struct vm_area_struct **vmas, int *nonblocking)
792 {
793 long ret = 0, i = 0;
794 struct vm_area_struct *vma = NULL;
795 struct follow_page_context ctx = { NULL };
796
797 if (!nr_pages)
798 return 0;
799
800 start = untagged_addr(start);
801
802 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
803
804
805
806
807
808
809 if (!(gup_flags & FOLL_FORCE))
810 gup_flags |= FOLL_NUMA;
811
812 do {
813 struct page *page;
814 unsigned int foll_flags = gup_flags;
815 unsigned int page_increm;
816
817
818 if (!vma || start >= vma->vm_end) {
819 vma = find_extend_vma(mm, start);
820 if (!vma && in_gate_area(mm, start)) {
821 ret = get_gate_page(mm, start & PAGE_MASK,
822 gup_flags, &vma,
823 pages ? &pages[i] : NULL);
824 if (ret)
825 goto out;
826 ctx.page_mask = 0;
827 goto next_page;
828 }
829
830 if (!vma || check_vma_flags(vma, gup_flags)) {
831 ret = -EFAULT;
832 goto out;
833 }
834 if (is_vm_hugetlb_page(vma)) {
835 if (should_force_cow_break(vma, foll_flags))
836 foll_flags |= FOLL_WRITE;
837 i = follow_hugetlb_page(mm, vma, pages, vmas,
838 &start, &nr_pages, i,
839 foll_flags, nonblocking);
840 continue;
841 }
842 }
843
844 if (should_force_cow_break(vma, foll_flags))
845 foll_flags |= FOLL_WRITE;
846
847 retry:
848
849
850
851
852 if (fatal_signal_pending(current)) {
853 ret = -ERESTARTSYS;
854 goto out;
855 }
856 cond_resched();
857
858 page = follow_page_mask(vma, start, foll_flags, &ctx);
859 if (!page) {
860 ret = faultin_page(tsk, vma, start, &foll_flags,
861 nonblocking);
862 switch (ret) {
863 case 0:
864 goto retry;
865 case -EBUSY:
866 ret = 0;
867
868 case -EFAULT:
869 case -ENOMEM:
870 case -EHWPOISON:
871 goto out;
872 case -ENOENT:
873 goto next_page;
874 }
875 BUG();
876 } else if (PTR_ERR(page) == -EEXIST) {
877
878
879
880
881 goto next_page;
882 } else if (IS_ERR(page)) {
883 ret = PTR_ERR(page);
884 goto out;
885 }
886 if (pages) {
887 pages[i] = page;
888 flush_anon_page(vma, page, start);
889 flush_dcache_page(page);
890 ctx.page_mask = 0;
891 }
892 next_page:
893 if (vmas) {
894 vmas[i] = vma;
895 ctx.page_mask = 0;
896 }
897 page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
898 if (page_increm > nr_pages)
899 page_increm = nr_pages;
900 i += page_increm;
901 start += page_increm * PAGE_SIZE;
902 nr_pages -= page_increm;
903 } while (nr_pages);
904 out:
905 if (ctx.pgmap)
906 put_dev_pagemap(ctx.pgmap);
907 return i ? i : ret;
908 }
909
910 static bool vma_permits_fault(struct vm_area_struct *vma,
911 unsigned int fault_flags)
912 {
913 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
914 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
915 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
916
917 if (!(vm_flags & vma->vm_flags))
918 return false;
919
920
921
922
923
924
925
926
927 if (!arch_vma_access_permitted(vma, write, false, foreign))
928 return false;
929
930 return true;
931 }
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963 int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
964 unsigned long address, unsigned int fault_flags,
965 bool *unlocked)
966 {
967 struct vm_area_struct *vma;
968 vm_fault_t ret, major = 0;
969
970 address = untagged_addr(address);
971
972 if (unlocked)
973 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
974
975 retry:
976 vma = find_extend_vma(mm, address);
977 if (!vma || address < vma->vm_start)
978 return -EFAULT;
979
980 if (!vma_permits_fault(vma, fault_flags))
981 return -EFAULT;
982
983 ret = handle_mm_fault(vma, address, fault_flags);
984 major |= ret & VM_FAULT_MAJOR;
985 if (ret & VM_FAULT_ERROR) {
986 int err = vm_fault_to_errno(ret, 0);
987
988 if (err)
989 return err;
990 BUG();
991 }
992
993 if (ret & VM_FAULT_RETRY) {
994 down_read(&mm->mmap_sem);
995 if (!(fault_flags & FAULT_FLAG_TRIED)) {
996 *unlocked = true;
997 fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
998 fault_flags |= FAULT_FLAG_TRIED;
999 goto retry;
1000 }
1001 }
1002
1003 if (tsk) {
1004 if (major)
1005 tsk->maj_flt++;
1006 else
1007 tsk->min_flt++;
1008 }
1009 return 0;
1010 }
1011 EXPORT_SYMBOL_GPL(fixup_user_fault);
1012
1013 static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
1014 struct mm_struct *mm,
1015 unsigned long start,
1016 unsigned long nr_pages,
1017 struct page **pages,
1018 struct vm_area_struct **vmas,
1019 int *locked,
1020 unsigned int flags)
1021 {
1022 long ret, pages_done;
1023 bool lock_dropped;
1024
1025 if (locked) {
1026
1027 BUG_ON(vmas);
1028
1029 BUG_ON(*locked != 1);
1030 }
1031
1032 if (pages)
1033 flags |= FOLL_GET;
1034
1035 pages_done = 0;
1036 lock_dropped = false;
1037 for (;;) {
1038 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
1039 vmas, locked);
1040 if (!locked)
1041
1042 return ret;
1043
1044
1045 if (!*locked) {
1046 BUG_ON(ret < 0);
1047 BUG_ON(ret >= nr_pages);
1048 }
1049
1050 if (ret > 0) {
1051 nr_pages -= ret;
1052 pages_done += ret;
1053 if (!nr_pages)
1054 break;
1055 }
1056 if (*locked) {
1057
1058
1059
1060
1061 if (!pages_done)
1062 pages_done = ret;
1063 break;
1064 }
1065
1066
1067
1068
1069 if (likely(pages))
1070 pages += ret;
1071 start += ret << PAGE_SHIFT;
1072
1073
1074
1075
1076
1077
1078 *locked = 1;
1079 lock_dropped = true;
1080 down_read(&mm->mmap_sem);
1081 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
1082 pages, NULL, NULL);
1083 if (ret != 1) {
1084 BUG_ON(ret > 1);
1085 if (!pages_done)
1086 pages_done = ret;
1087 break;
1088 }
1089 nr_pages--;
1090 pages_done++;
1091 if (!nr_pages)
1092 break;
1093 if (likely(pages))
1094 pages++;
1095 start += PAGE_SIZE;
1096 }
1097 if (lock_dropped && *locked) {
1098
1099
1100
1101
1102 up_read(&mm->mmap_sem);
1103 *locked = 0;
1104 }
1105 return pages_done;
1106 }
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
1165 unsigned long start, unsigned long nr_pages,
1166 unsigned int gup_flags, struct page **pages,
1167 struct vm_area_struct **vmas, int *locked)
1168 {
1169
1170
1171
1172
1173
1174
1175 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1176 return -EINVAL;
1177
1178 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1179 locked,
1180 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
1181 }
1182 EXPORT_SYMBOL(get_user_pages_remote);
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203 long populate_vma_page_range(struct vm_area_struct *vma,
1204 unsigned long start, unsigned long end, int *nonblocking)
1205 {
1206 struct mm_struct *mm = vma->vm_mm;
1207 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1208 int gup_flags;
1209
1210 VM_BUG_ON(start & ~PAGE_MASK);
1211 VM_BUG_ON(end & ~PAGE_MASK);
1212 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1213 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1214 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
1215
1216 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
1217 if (vma->vm_flags & VM_LOCKONFAULT)
1218 gup_flags &= ~FOLL_POPULATE;
1219
1220
1221
1222
1223
1224 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1225 gup_flags |= FOLL_WRITE;
1226
1227
1228
1229
1230
1231 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
1232 gup_flags |= FOLL_FORCE;
1233
1234
1235
1236
1237
1238 return __get_user_pages(current, mm, start, nr_pages, gup_flags,
1239 NULL, NULL, nonblocking);
1240 }
1241
1242
1243
1244
1245
1246
1247
1248
1249 int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1250 {
1251 struct mm_struct *mm = current->mm;
1252 unsigned long end, nstart, nend;
1253 struct vm_area_struct *vma = NULL;
1254 int locked = 0;
1255 long ret = 0;
1256
1257 end = start + len;
1258
1259 for (nstart = start; nstart < end; nstart = nend) {
1260
1261
1262
1263
1264 if (!locked) {
1265 locked = 1;
1266 down_read(&mm->mmap_sem);
1267 vma = find_vma(mm, nstart);
1268 } else if (nstart >= vma->vm_end)
1269 vma = vma->vm_next;
1270 if (!vma || vma->vm_start >= end)
1271 break;
1272
1273
1274
1275
1276 nend = min(end, vma->vm_end);
1277 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1278 continue;
1279 if (nstart < vma->vm_start)
1280 nstart = vma->vm_start;
1281
1282
1283
1284
1285
1286 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1287 if (ret < 0) {
1288 if (ignore_errors) {
1289 ret = 0;
1290 continue;
1291 }
1292 break;
1293 }
1294 nend = nstart + ret * PAGE_SIZE;
1295 ret = 0;
1296 }
1297 if (locked)
1298 up_read(&mm->mmap_sem);
1299 return ret;
1300 }
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316 #ifdef CONFIG_ELF_CORE
1317 struct page *get_dump_page(unsigned long addr)
1318 {
1319 struct vm_area_struct *vma;
1320 struct page *page;
1321
1322 if (__get_user_pages(current, current->mm, addr, 1,
1323 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
1324 NULL) < 1)
1325 return NULL;
1326 flush_cache_page(vma, addr, page_to_pfn(page));
1327 return page;
1328 }
1329 #endif
1330 #else
1331 static long __get_user_pages_locked(struct task_struct *tsk,
1332 struct mm_struct *mm, unsigned long start,
1333 unsigned long nr_pages, struct page **pages,
1334 struct vm_area_struct **vmas, int *locked,
1335 unsigned int foll_flags)
1336 {
1337 struct vm_area_struct *vma;
1338 unsigned long vm_flags;
1339 int i;
1340
1341
1342
1343
1344 vm_flags = (foll_flags & FOLL_WRITE) ?
1345 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1346 vm_flags &= (foll_flags & FOLL_FORCE) ?
1347 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1348
1349 for (i = 0; i < nr_pages; i++) {
1350 vma = find_vma(mm, start);
1351 if (!vma)
1352 goto finish_or_fault;
1353
1354
1355 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1356 !(vm_flags & vma->vm_flags))
1357 goto finish_or_fault;
1358
1359 if (pages) {
1360 pages[i] = virt_to_page(start);
1361 if (pages[i])
1362 get_page(pages[i]);
1363 }
1364 if (vmas)
1365 vmas[i] = vma;
1366 start = (start + PAGE_SIZE) & PAGE_MASK;
1367 }
1368
1369 return i;
1370
1371 finish_or_fault:
1372 return i ? : -EFAULT;
1373 }
1374 #endif
1375
1376 #if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
1377 static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
1378 {
1379 long i;
1380 struct vm_area_struct *vma_prev = NULL;
1381
1382 for (i = 0; i < nr_pages; i++) {
1383 struct vm_area_struct *vma = vmas[i];
1384
1385 if (vma == vma_prev)
1386 continue;
1387
1388 vma_prev = vma;
1389
1390 if (vma_is_fsdax(vma))
1391 return true;
1392 }
1393 return false;
1394 }
1395
1396 #ifdef CONFIG_CMA
1397 static struct page *new_non_cma_page(struct page *page, unsigned long private)
1398 {
1399
1400
1401
1402
1403 int nid = page_to_nid(page);
1404
1405
1406
1407
1408
1409
1410
1411 gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
1412
1413 if (PageHighMem(page))
1414 gfp_mask |= __GFP_HIGHMEM;
1415
1416 #ifdef CONFIG_HUGETLB_PAGE
1417 if (PageHuge(page)) {
1418 struct hstate *h = page_hstate(page);
1419
1420
1421
1422
1423 return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
1424 }
1425 #endif
1426 if (PageTransHuge(page)) {
1427 struct page *thp;
1428
1429
1430
1431 gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
1432
1433
1434
1435
1436
1437 thp_gfpmask &= ~__GFP_MOVABLE;
1438 thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
1439 if (!thp)
1440 return NULL;
1441 prep_transhuge_page(thp);
1442 return thp;
1443 }
1444
1445 return __alloc_pages_node(nid, gfp_mask, 0);
1446 }
1447
1448 static long check_and_migrate_cma_pages(struct task_struct *tsk,
1449 struct mm_struct *mm,
1450 unsigned long start,
1451 unsigned long nr_pages,
1452 struct page **pages,
1453 struct vm_area_struct **vmas,
1454 unsigned int gup_flags)
1455 {
1456 unsigned long i;
1457 unsigned long step;
1458 bool drain_allow = true;
1459 bool migrate_allow = true;
1460 LIST_HEAD(cma_page_list);
1461
1462 check_again:
1463 for (i = 0; i < nr_pages;) {
1464
1465 struct page *head = compound_head(pages[i]);
1466
1467
1468
1469
1470
1471 step = compound_nr(head) - (pages[i] - head);
1472
1473
1474
1475
1476
1477 if (is_migrate_cma_page(head)) {
1478 if (PageHuge(head))
1479 isolate_huge_page(head, &cma_page_list);
1480 else {
1481 if (!PageLRU(head) && drain_allow) {
1482 lru_add_drain_all();
1483 drain_allow = false;
1484 }
1485
1486 if (!isolate_lru_page(head)) {
1487 list_add_tail(&head->lru, &cma_page_list);
1488 mod_node_page_state(page_pgdat(head),
1489 NR_ISOLATED_ANON +
1490 page_is_file_cache(head),
1491 hpage_nr_pages(head));
1492 }
1493 }
1494 }
1495
1496 i += step;
1497 }
1498
1499 if (!list_empty(&cma_page_list)) {
1500
1501
1502
1503 for (i = 0; i < nr_pages; i++)
1504 put_page(pages[i]);
1505
1506 if (migrate_pages(&cma_page_list, new_non_cma_page,
1507 NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
1508
1509
1510
1511
1512 migrate_allow = false;
1513
1514 if (!list_empty(&cma_page_list))
1515 putback_movable_pages(&cma_page_list);
1516 }
1517
1518
1519
1520
1521
1522 nr_pages = __get_user_pages_locked(tsk, mm, start, nr_pages,
1523 pages, vmas, NULL,
1524 gup_flags);
1525
1526 if ((nr_pages > 0) && migrate_allow) {
1527 drain_allow = true;
1528 goto check_again;
1529 }
1530 }
1531
1532 return nr_pages;
1533 }
1534 #else
1535 static long check_and_migrate_cma_pages(struct task_struct *tsk,
1536 struct mm_struct *mm,
1537 unsigned long start,
1538 unsigned long nr_pages,
1539 struct page **pages,
1540 struct vm_area_struct **vmas,
1541 unsigned int gup_flags)
1542 {
1543 return nr_pages;
1544 }
1545 #endif
1546
1547
1548
1549
1550
1551 static long __gup_longterm_locked(struct task_struct *tsk,
1552 struct mm_struct *mm,
1553 unsigned long start,
1554 unsigned long nr_pages,
1555 struct page **pages,
1556 struct vm_area_struct **vmas,
1557 unsigned int gup_flags)
1558 {
1559 struct vm_area_struct **vmas_tmp = vmas;
1560 unsigned long flags = 0;
1561 long rc, i;
1562
1563 if (gup_flags & FOLL_LONGTERM) {
1564 if (!pages)
1565 return -EINVAL;
1566
1567 if (!vmas_tmp) {
1568 vmas_tmp = kcalloc(nr_pages,
1569 sizeof(struct vm_area_struct *),
1570 GFP_KERNEL);
1571 if (!vmas_tmp)
1572 return -ENOMEM;
1573 }
1574 flags = memalloc_nocma_save();
1575 }
1576
1577 rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
1578 vmas_tmp, NULL, gup_flags);
1579
1580 if (gup_flags & FOLL_LONGTERM) {
1581 memalloc_nocma_restore(flags);
1582 if (rc < 0)
1583 goto out;
1584
1585 if (check_dax_vmas(vmas_tmp, rc)) {
1586 for (i = 0; i < rc; i++)
1587 put_page(pages[i]);
1588 rc = -EOPNOTSUPP;
1589 goto out;
1590 }
1591
1592 rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
1593 vmas_tmp, gup_flags);
1594 }
1595
1596 out:
1597 if (vmas_tmp != vmas)
1598 kfree(vmas_tmp);
1599 return rc;
1600 }
1601 #else
1602 static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
1603 struct mm_struct *mm,
1604 unsigned long start,
1605 unsigned long nr_pages,
1606 struct page **pages,
1607 struct vm_area_struct **vmas,
1608 unsigned int flags)
1609 {
1610 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1611 NULL, flags);
1612 }
1613 #endif
1614
1615
1616
1617
1618
1619
1620
1621
1622 long get_user_pages(unsigned long start, unsigned long nr_pages,
1623 unsigned int gup_flags, struct page **pages,
1624 struct vm_area_struct **vmas)
1625 {
1626 return __gup_longterm_locked(current, current->mm, start, nr_pages,
1627 pages, vmas, gup_flags | FOLL_TOUCH);
1628 }
1629 EXPORT_SYMBOL(get_user_pages);
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
1653 unsigned int gup_flags, struct page **pages,
1654 int *locked)
1655 {
1656
1657
1658
1659
1660
1661
1662 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1663 return -EINVAL;
1664
1665 return __get_user_pages_locked(current, current->mm, start, nr_pages,
1666 pages, NULL, locked,
1667 gup_flags | FOLL_TOUCH);
1668 }
1669 EXPORT_SYMBOL(get_user_pages_locked);
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
1687 struct page **pages, unsigned int gup_flags)
1688 {
1689 struct mm_struct *mm = current->mm;
1690 int locked = 1;
1691 long ret;
1692
1693
1694
1695
1696
1697
1698
1699 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1700 return -EINVAL;
1701
1702 down_read(&mm->mmap_sem);
1703 ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
1704 &locked, gup_flags | FOLL_TOUCH);
1705 if (locked)
1706 up_read(&mm->mmap_sem);
1707 return ret;
1708 }
1709 EXPORT_SYMBOL(get_user_pages_unlocked);
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744 #ifdef CONFIG_HAVE_FAST_GUP
1745 #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777 static inline pte_t gup_get_pte(pte_t *ptep)
1778 {
1779 pte_t pte;
1780
1781 do {
1782 pte.pte_low = ptep->pte_low;
1783 smp_rmb();
1784 pte.pte_high = ptep->pte_high;
1785 smp_rmb();
1786 } while (unlikely(pte.pte_low != ptep->pte_low));
1787
1788 return pte;
1789 }
1790 #else
1791
1792
1793
1794 static inline pte_t gup_get_pte(pte_t *ptep)
1795 {
1796 return READ_ONCE(*ptep);
1797 }
1798 #endif
1799
1800 static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
1801 struct page **pages)
1802 {
1803 while ((*nr) - nr_start) {
1804 struct page *page = pages[--(*nr)];
1805
1806 ClearPageReferenced(page);
1807 put_page(page);
1808 }
1809 }
1810
1811
1812
1813
1814
1815 static inline struct page *try_get_compound_head(struct page *page, int refs)
1816 {
1817 struct page *head = compound_head(page);
1818 if (WARN_ON_ONCE(page_ref_count(head) < 0))
1819 return NULL;
1820 if (unlikely(!page_cache_add_speculative(head, refs)))
1821 return NULL;
1822 return head;
1823 }
1824
1825 #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
1826 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1827 unsigned int flags, struct page **pages, int *nr)
1828 {
1829 struct dev_pagemap *pgmap = NULL;
1830 int nr_start = *nr, ret = 0;
1831 pte_t *ptep, *ptem;
1832
1833 ptem = ptep = pte_offset_map(&pmd, addr);
1834 do {
1835 pte_t pte = gup_get_pte(ptep);
1836 struct page *head, *page;
1837
1838
1839
1840
1841
1842 if (pte_protnone(pte))
1843 goto pte_unmap;
1844
1845 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
1846 goto pte_unmap;
1847
1848 if (pte_devmap(pte)) {
1849 if (unlikely(flags & FOLL_LONGTERM))
1850 goto pte_unmap;
1851
1852 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
1853 if (unlikely(!pgmap)) {
1854 undo_dev_pagemap(nr, nr_start, pages);
1855 goto pte_unmap;
1856 }
1857 } else if (pte_special(pte))
1858 goto pte_unmap;
1859
1860 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1861 page = pte_page(pte);
1862
1863 head = try_get_compound_head(page, 1);
1864 if (!head)
1865 goto pte_unmap;
1866
1867 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1868 put_page(head);
1869 goto pte_unmap;
1870 }
1871
1872 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1873
1874 SetPageReferenced(page);
1875 pages[*nr] = page;
1876 (*nr)++;
1877
1878 } while (ptep++, addr += PAGE_SIZE, addr != end);
1879
1880 ret = 1;
1881
1882 pte_unmap:
1883 if (pgmap)
1884 put_dev_pagemap(pgmap);
1885 pte_unmap(ptem);
1886 return ret;
1887 }
1888 #else
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1900 unsigned int flags, struct page **pages, int *nr)
1901 {
1902 return 0;
1903 }
1904 #endif
1905
1906 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1907 static int __gup_device_huge(unsigned long pfn, unsigned long addr,
1908 unsigned long end, struct page **pages, int *nr)
1909 {
1910 int nr_start = *nr;
1911 struct dev_pagemap *pgmap = NULL;
1912
1913 do {
1914 struct page *page = pfn_to_page(pfn);
1915
1916 pgmap = get_dev_pagemap(pfn, pgmap);
1917 if (unlikely(!pgmap)) {
1918 undo_dev_pagemap(nr, nr_start, pages);
1919 return 0;
1920 }
1921 SetPageReferenced(page);
1922 pages[*nr] = page;
1923 get_page(page);
1924 (*nr)++;
1925 pfn++;
1926 } while (addr += PAGE_SIZE, addr != end);
1927
1928 if (pgmap)
1929 put_dev_pagemap(pgmap);
1930 return 1;
1931 }
1932
1933 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1934 unsigned long end, struct page **pages, int *nr)
1935 {
1936 unsigned long fault_pfn;
1937 int nr_start = *nr;
1938
1939 fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1940 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1941 return 0;
1942
1943 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1944 undo_dev_pagemap(nr, nr_start, pages);
1945 return 0;
1946 }
1947 return 1;
1948 }
1949
1950 static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1951 unsigned long end, struct page **pages, int *nr)
1952 {
1953 unsigned long fault_pfn;
1954 int nr_start = *nr;
1955
1956 fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1957 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1958 return 0;
1959
1960 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1961 undo_dev_pagemap(nr, nr_start, pages);
1962 return 0;
1963 }
1964 return 1;
1965 }
1966 #else
1967 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1968 unsigned long end, struct page **pages, int *nr)
1969 {
1970 BUILD_BUG();
1971 return 0;
1972 }
1973
1974 static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
1975 unsigned long end, struct page **pages, int *nr)
1976 {
1977 BUILD_BUG();
1978 return 0;
1979 }
1980 #endif
1981
1982 #ifdef CONFIG_ARCH_HAS_HUGEPD
1983 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
1984 unsigned long sz)
1985 {
1986 unsigned long __boundary = (addr + sz) & ~(sz-1);
1987 return (__boundary - 1 < end - 1) ? __boundary : end;
1988 }
1989
1990 static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
1991 unsigned long end, unsigned int flags,
1992 struct page **pages, int *nr)
1993 {
1994 unsigned long pte_end;
1995 struct page *head, *page;
1996 pte_t pte;
1997 int refs;
1998
1999 pte_end = (addr + sz) & ~(sz-1);
2000 if (pte_end < end)
2001 end = pte_end;
2002
2003 pte = READ_ONCE(*ptep);
2004
2005 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
2006 return 0;
2007
2008
2009 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2010
2011 refs = 0;
2012 head = pte_page(pte);
2013
2014 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
2015 do {
2016 VM_BUG_ON(compound_head(page) != head);
2017 pages[*nr] = page;
2018 (*nr)++;
2019 page++;
2020 refs++;
2021 } while (addr += PAGE_SIZE, addr != end);
2022
2023 head = try_get_compound_head(head, refs);
2024 if (!head) {
2025 *nr -= refs;
2026 return 0;
2027 }
2028
2029 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
2030
2031 *nr -= refs;
2032 while (refs--)
2033 put_page(head);
2034 return 0;
2035 }
2036
2037 SetPageReferenced(head);
2038 return 1;
2039 }
2040
2041 static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2042 unsigned int pdshift, unsigned long end, unsigned int flags,
2043 struct page **pages, int *nr)
2044 {
2045 pte_t *ptep;
2046 unsigned long sz = 1UL << hugepd_shift(hugepd);
2047 unsigned long next;
2048
2049 ptep = hugepte_offset(hugepd, addr, pdshift);
2050 do {
2051 next = hugepte_addr_end(addr, end, sz);
2052 if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
2053 return 0;
2054 } while (ptep++, addr = next, addr != end);
2055
2056 return 1;
2057 }
2058 #else
2059 static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2060 unsigned int pdshift, unsigned long end, unsigned int flags,
2061 struct page **pages, int *nr)
2062 {
2063 return 0;
2064 }
2065 #endif
2066
2067 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2068 unsigned long end, unsigned int flags,
2069 struct page **pages, int *nr)
2070 {
2071 struct page *head, *page;
2072 int refs;
2073
2074 if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
2075 return 0;
2076
2077 if (pmd_devmap(orig)) {
2078 if (unlikely(flags & FOLL_LONGTERM))
2079 return 0;
2080 return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
2081 }
2082
2083 refs = 0;
2084 page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
2085 do {
2086 pages[*nr] = page;
2087 (*nr)++;
2088 page++;
2089 refs++;
2090 } while (addr += PAGE_SIZE, addr != end);
2091
2092 head = try_get_compound_head(pmd_page(orig), refs);
2093 if (!head) {
2094 *nr -= refs;
2095 return 0;
2096 }
2097
2098 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2099 *nr -= refs;
2100 while (refs--)
2101 put_page(head);
2102 return 0;
2103 }
2104
2105 SetPageReferenced(head);
2106 return 1;
2107 }
2108
2109 static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2110 unsigned long end, unsigned int flags, struct page **pages, int *nr)
2111 {
2112 struct page *head, *page;
2113 int refs;
2114
2115 if (!pud_access_permitted(orig, flags & FOLL_WRITE))
2116 return 0;
2117
2118 if (pud_devmap(orig)) {
2119 if (unlikely(flags & FOLL_LONGTERM))
2120 return 0;
2121 return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr);
2122 }
2123
2124 refs = 0;
2125 page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
2126 do {
2127 pages[*nr] = page;
2128 (*nr)++;
2129 page++;
2130 refs++;
2131 } while (addr += PAGE_SIZE, addr != end);
2132
2133 head = try_get_compound_head(pud_page(orig), refs);
2134 if (!head) {
2135 *nr -= refs;
2136 return 0;
2137 }
2138
2139 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2140 *nr -= refs;
2141 while (refs--)
2142 put_page(head);
2143 return 0;
2144 }
2145
2146 SetPageReferenced(head);
2147 return 1;
2148 }
2149
2150 static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
2151 unsigned long end, unsigned int flags,
2152 struct page **pages, int *nr)
2153 {
2154 int refs;
2155 struct page *head, *page;
2156
2157 if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
2158 return 0;
2159
2160 BUILD_BUG_ON(pgd_devmap(orig));
2161 refs = 0;
2162 page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
2163 do {
2164 pages[*nr] = page;
2165 (*nr)++;
2166 page++;
2167 refs++;
2168 } while (addr += PAGE_SIZE, addr != end);
2169
2170 head = try_get_compound_head(pgd_page(orig), refs);
2171 if (!head) {
2172 *nr -= refs;
2173 return 0;
2174 }
2175
2176 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
2177 *nr -= refs;
2178 while (refs--)
2179 put_page(head);
2180 return 0;
2181 }
2182
2183 SetPageReferenced(head);
2184 return 1;
2185 }
2186
2187 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
2188 unsigned int flags, struct page **pages, int *nr)
2189 {
2190 unsigned long next;
2191 pmd_t *pmdp;
2192
2193 pmdp = pmd_offset(&pud, addr);
2194 do {
2195 pmd_t pmd = READ_ONCE(*pmdp);
2196
2197 next = pmd_addr_end(addr, end);
2198 if (!pmd_present(pmd))
2199 return 0;
2200
2201 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
2202 pmd_devmap(pmd))) {
2203
2204
2205
2206
2207
2208 if (pmd_protnone(pmd))
2209 return 0;
2210
2211 if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
2212 pages, nr))
2213 return 0;
2214
2215 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
2216
2217
2218
2219
2220 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
2221 PMD_SHIFT, next, flags, pages, nr))
2222 return 0;
2223 } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
2224 return 0;
2225 } while (pmdp++, addr = next, addr != end);
2226
2227 return 1;
2228 }
2229
2230 static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
2231 unsigned int flags, struct page **pages, int *nr)
2232 {
2233 unsigned long next;
2234 pud_t *pudp;
2235
2236 pudp = pud_offset(&p4d, addr);
2237 do {
2238 pud_t pud = READ_ONCE(*pudp);
2239
2240 next = pud_addr_end(addr, end);
2241 if (pud_none(pud))
2242 return 0;
2243 if (unlikely(pud_huge(pud))) {
2244 if (!gup_huge_pud(pud, pudp, addr, next, flags,
2245 pages, nr))
2246 return 0;
2247 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
2248 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
2249 PUD_SHIFT, next, flags, pages, nr))
2250 return 0;
2251 } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
2252 return 0;
2253 } while (pudp++, addr = next, addr != end);
2254
2255 return 1;
2256 }
2257
2258 static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
2259 unsigned int flags, struct page **pages, int *nr)
2260 {
2261 unsigned long next;
2262 p4d_t *p4dp;
2263
2264 p4dp = p4d_offset(&pgd, addr);
2265 do {
2266 p4d_t p4d = READ_ONCE(*p4dp);
2267
2268 next = p4d_addr_end(addr, end);
2269 if (p4d_none(p4d))
2270 return 0;
2271 BUILD_BUG_ON(p4d_huge(p4d));
2272 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
2273 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
2274 P4D_SHIFT, next, flags, pages, nr))
2275 return 0;
2276 } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
2277 return 0;
2278 } while (p4dp++, addr = next, addr != end);
2279
2280 return 1;
2281 }
2282
2283 static void gup_pgd_range(unsigned long addr, unsigned long end,
2284 unsigned int flags, struct page **pages, int *nr)
2285 {
2286 unsigned long next;
2287 pgd_t *pgdp;
2288
2289 pgdp = pgd_offset(current->mm, addr);
2290 do {
2291 pgd_t pgd = READ_ONCE(*pgdp);
2292
2293 next = pgd_addr_end(addr, end);
2294 if (pgd_none(pgd))
2295 return;
2296 if (unlikely(pgd_huge(pgd))) {
2297 if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
2298 pages, nr))
2299 return;
2300 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
2301 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
2302 PGDIR_SHIFT, next, flags, pages, nr))
2303 return;
2304 } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
2305 return;
2306 } while (pgdp++, addr = next, addr != end);
2307 }
2308 #else
2309 static inline void gup_pgd_range(unsigned long addr, unsigned long end,
2310 unsigned int flags, struct page **pages, int *nr)
2311 {
2312 }
2313 #endif
2314
2315 #ifndef gup_fast_permitted
2316
2317
2318
2319
2320 static bool gup_fast_permitted(unsigned long start, unsigned long end)
2321 {
2322 return true;
2323 }
2324 #endif
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
2340 struct page **pages)
2341 {
2342 unsigned long len, end;
2343 unsigned long flags;
2344 int nr = 0;
2345
2346 start = untagged_addr(start) & PAGE_MASK;
2347 len = (unsigned long) nr_pages << PAGE_SHIFT;
2348 end = start + len;
2349
2350 if (end <= start)
2351 return 0;
2352 if (unlikely(!access_ok((void __user *)start, len)))
2353 return 0;
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373 if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
2374 gup_fast_permitted(start, end)) {
2375 local_irq_save(flags);
2376 gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
2377 local_irq_restore(flags);
2378 }
2379
2380 return nr;
2381 }
2382 EXPORT_SYMBOL_GPL(__get_user_pages_fast);
2383
2384 static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
2385 unsigned int gup_flags, struct page **pages)
2386 {
2387 int ret;
2388
2389
2390
2391
2392
2393 if (gup_flags & FOLL_LONGTERM) {
2394 down_read(¤t->mm->mmap_sem);
2395 ret = __gup_longterm_locked(current, current->mm,
2396 start, nr_pages,
2397 pages, NULL, gup_flags);
2398 up_read(¤t->mm->mmap_sem);
2399 } else {
2400 ret = get_user_pages_unlocked(start, nr_pages,
2401 pages, gup_flags);
2402 }
2403
2404 return ret;
2405 }
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423 int get_user_pages_fast(unsigned long start, int nr_pages,
2424 unsigned int gup_flags, struct page **pages)
2425 {
2426 unsigned long addr, len, end;
2427 int nr = 0, ret = 0;
2428
2429 if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
2430 FOLL_FORCE)))
2431 return -EINVAL;
2432
2433 start = untagged_addr(start) & PAGE_MASK;
2434 addr = start;
2435 len = (unsigned long) nr_pages << PAGE_SHIFT;
2436 end = start + len;
2437
2438 if (end <= start)
2439 return 0;
2440 if (unlikely(!access_ok((void __user *)start, len)))
2441 return -EFAULT;
2442
2443
2444
2445
2446
2447
2448
2449
2450 if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
2451 gup_fast_permitted(start, end)) {
2452 local_irq_disable();
2453 gup_pgd_range(addr, end, gup_flags | FOLL_WRITE, pages, &nr);
2454 local_irq_enable();
2455 ret = nr;
2456 }
2457
2458 if (nr < nr_pages) {
2459
2460 start += nr << PAGE_SHIFT;
2461 pages += nr;
2462
2463 ret = __gup_longterm_unlocked(start, nr_pages - nr,
2464 gup_flags, pages);
2465
2466
2467 if (nr > 0) {
2468 if (ret < 0)
2469 ret = nr;
2470 else
2471 ret += nr;
2472 }
2473 }
2474
2475 return ret;
2476 }
2477 EXPORT_SYMBOL_GPL(get_user_pages_fast);