This source file includes following definitions.
- hmm_alloc_notifier
- hmm_free_notifier
- hmm_release
- notifiers_decrement
- hmm_invalidate_range_start
- hmm_invalidate_range_end
- hmm_mirror_register
- hmm_mirror_unregister
- hmm_vma_do_fault
- hmm_pfns_bad
- hmm_vma_walk_hole_
- hmm_pte_need_fault
- hmm_range_need_fault
- hmm_vma_walk_hole
- pmd_to_hmm_pfn_flags
- hmm_vma_handle_pmd
- pte_to_hmm_pfn_flags
- hmm_vma_handle_pte
- hmm_vma_walk_pmd
- pud_to_hmm_pfn_flags
- hmm_vma_walk_pud
- hmm_vma_walk_hugetlb_entry
- hmm_pfns_clear
- hmm_range_register
- hmm_range_unregister
- hmm_range_fault
- hmm_range_dma_map
- hmm_range_dma_unmap
1
2
3
4
5
6
7
8
9
10
11 #include <linux/pagewalk.h>
12 #include <linux/hmm.h>
13 #include <linux/init.h>
14 #include <linux/rmap.h>
15 #include <linux/swap.h>
16 #include <linux/slab.h>
17 #include <linux/sched.h>
18 #include <linux/mmzone.h>
19 #include <linux/pagemap.h>
20 #include <linux/swapops.h>
21 #include <linux/hugetlb.h>
22 #include <linux/memremap.h>
23 #include <linux/sched/mm.h>
24 #include <linux/jump_label.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/mmu_notifier.h>
27 #include <linux/memory_hotplug.h>
28
29 static struct mmu_notifier *hmm_alloc_notifier(struct mm_struct *mm)
30 {
31 struct hmm *hmm;
32
33 hmm = kzalloc(sizeof(*hmm), GFP_KERNEL);
34 if (!hmm)
35 return ERR_PTR(-ENOMEM);
36
37 init_waitqueue_head(&hmm->wq);
38 INIT_LIST_HEAD(&hmm->mirrors);
39 init_rwsem(&hmm->mirrors_sem);
40 INIT_LIST_HEAD(&hmm->ranges);
41 spin_lock_init(&hmm->ranges_lock);
42 hmm->notifiers = 0;
43 return &hmm->mmu_notifier;
44 }
45
46 static void hmm_free_notifier(struct mmu_notifier *mn)
47 {
48 struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
49
50 WARN_ON(!list_empty(&hmm->ranges));
51 WARN_ON(!list_empty(&hmm->mirrors));
52 kfree(hmm);
53 }
54
55 static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
56 {
57 struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
58 struct hmm_mirror *mirror;
59
60
61
62
63
64 WARN_ON(!list_empty_careful(&hmm->ranges));
65
66 down_read(&hmm->mirrors_sem);
67 list_for_each_entry(mirror, &hmm->mirrors, list) {
68
69
70
71
72 if (mirror->ops->release)
73 mirror->ops->release(mirror);
74 }
75 up_read(&hmm->mirrors_sem);
76 }
77
78 static void notifiers_decrement(struct hmm *hmm)
79 {
80 unsigned long flags;
81
82 spin_lock_irqsave(&hmm->ranges_lock, flags);
83 hmm->notifiers--;
84 if (!hmm->notifiers) {
85 struct hmm_range *range;
86
87 list_for_each_entry(range, &hmm->ranges, list) {
88 if (range->valid)
89 continue;
90 range->valid = true;
91 }
92 wake_up_all(&hmm->wq);
93 }
94 spin_unlock_irqrestore(&hmm->ranges_lock, flags);
95 }
96
97 static int hmm_invalidate_range_start(struct mmu_notifier *mn,
98 const struct mmu_notifier_range *nrange)
99 {
100 struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
101 struct hmm_mirror *mirror;
102 struct hmm_range *range;
103 unsigned long flags;
104 int ret = 0;
105
106 spin_lock_irqsave(&hmm->ranges_lock, flags);
107 hmm->notifiers++;
108 list_for_each_entry(range, &hmm->ranges, list) {
109 if (nrange->end < range->start || nrange->start >= range->end)
110 continue;
111
112 range->valid = false;
113 }
114 spin_unlock_irqrestore(&hmm->ranges_lock, flags);
115
116 if (mmu_notifier_range_blockable(nrange))
117 down_read(&hmm->mirrors_sem);
118 else if (!down_read_trylock(&hmm->mirrors_sem)) {
119 ret = -EAGAIN;
120 goto out;
121 }
122
123 list_for_each_entry(mirror, &hmm->mirrors, list) {
124 int rc;
125
126 rc = mirror->ops->sync_cpu_device_pagetables(mirror, nrange);
127 if (rc) {
128 if (WARN_ON(mmu_notifier_range_blockable(nrange) ||
129 rc != -EAGAIN))
130 continue;
131 ret = -EAGAIN;
132 break;
133 }
134 }
135 up_read(&hmm->mirrors_sem);
136
137 out:
138 if (ret)
139 notifiers_decrement(hmm);
140 return ret;
141 }
142
143 static void hmm_invalidate_range_end(struct mmu_notifier *mn,
144 const struct mmu_notifier_range *nrange)
145 {
146 struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
147
148 notifiers_decrement(hmm);
149 }
150
151 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
152 .release = hmm_release,
153 .invalidate_range_start = hmm_invalidate_range_start,
154 .invalidate_range_end = hmm_invalidate_range_end,
155 .alloc_notifier = hmm_alloc_notifier,
156 .free_notifier = hmm_free_notifier,
157 };
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175 int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
176 {
177 struct mmu_notifier *mn;
178
179 lockdep_assert_held_write(&mm->mmap_sem);
180
181
182 if (!mm || !mirror || !mirror->ops)
183 return -EINVAL;
184
185 mn = mmu_notifier_get_locked(&hmm_mmu_notifier_ops, mm);
186 if (IS_ERR(mn))
187 return PTR_ERR(mn);
188 mirror->hmm = container_of(mn, struct hmm, mmu_notifier);
189
190 down_write(&mirror->hmm->mirrors_sem);
191 list_add(&mirror->list, &mirror->hmm->mirrors);
192 up_write(&mirror->hmm->mirrors_sem);
193
194 return 0;
195 }
196 EXPORT_SYMBOL(hmm_mirror_register);
197
198
199
200
201
202
203
204
205 void hmm_mirror_unregister(struct hmm_mirror *mirror)
206 {
207 struct hmm *hmm = mirror->hmm;
208
209 down_write(&hmm->mirrors_sem);
210 list_del(&mirror->list);
211 up_write(&hmm->mirrors_sem);
212 mmu_notifier_put(&hmm->mmu_notifier);
213 }
214 EXPORT_SYMBOL(hmm_mirror_unregister);
215
216 struct hmm_vma_walk {
217 struct hmm_range *range;
218 struct dev_pagemap *pgmap;
219 unsigned long last;
220 unsigned int flags;
221 };
222
223 static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
224 bool write_fault, uint64_t *pfn)
225 {
226 unsigned int flags = FAULT_FLAG_REMOTE;
227 struct hmm_vma_walk *hmm_vma_walk = walk->private;
228 struct hmm_range *range = hmm_vma_walk->range;
229 struct vm_area_struct *vma = walk->vma;
230 vm_fault_t ret;
231
232 if (!vma)
233 goto err;
234
235 if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY)
236 flags |= FAULT_FLAG_ALLOW_RETRY;
237 if (write_fault)
238 flags |= FAULT_FLAG_WRITE;
239
240 ret = handle_mm_fault(vma, addr, flags);
241 if (ret & VM_FAULT_RETRY) {
242
243 return -EAGAIN;
244 }
245 if (ret & VM_FAULT_ERROR)
246 goto err;
247
248 return -EBUSY;
249
250 err:
251 *pfn = range->values[HMM_PFN_ERROR];
252 return -EFAULT;
253 }
254
255 static int hmm_pfns_bad(unsigned long addr,
256 unsigned long end,
257 struct mm_walk *walk)
258 {
259 struct hmm_vma_walk *hmm_vma_walk = walk->private;
260 struct hmm_range *range = hmm_vma_walk->range;
261 uint64_t *pfns = range->pfns;
262 unsigned long i;
263
264 i = (addr - range->start) >> PAGE_SHIFT;
265 for (; addr < end; addr += PAGE_SIZE, i++)
266 pfns[i] = range->values[HMM_PFN_ERROR];
267
268 return 0;
269 }
270
271
272
273
274
275
276
277
278
279
280
281
282
283 static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
284 bool fault, bool write_fault,
285 struct mm_walk *walk)
286 {
287 struct hmm_vma_walk *hmm_vma_walk = walk->private;
288 struct hmm_range *range = hmm_vma_walk->range;
289 uint64_t *pfns = range->pfns;
290 unsigned long i;
291
292 hmm_vma_walk->last = addr;
293 i = (addr - range->start) >> PAGE_SHIFT;
294
295 if (write_fault && walk->vma && !(walk->vma->vm_flags & VM_WRITE))
296 return -EPERM;
297
298 for (; addr < end; addr += PAGE_SIZE, i++) {
299 pfns[i] = range->values[HMM_PFN_NONE];
300 if (fault || write_fault) {
301 int ret;
302
303 ret = hmm_vma_do_fault(walk, addr, write_fault,
304 &pfns[i]);
305 if (ret != -EBUSY)
306 return ret;
307 }
308 }
309
310 return (fault || write_fault) ? -EBUSY : 0;
311 }
312
313 static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
314 uint64_t pfns, uint64_t cpu_flags,
315 bool *fault, bool *write_fault)
316 {
317 struct hmm_range *range = hmm_vma_walk->range;
318
319 if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT)
320 return;
321
322
323
324
325
326
327
328
329
330
331
332 pfns = (pfns & range->pfn_flags_mask) | range->default_flags;
333
334
335 if (!(pfns & range->flags[HMM_PFN_VALID]))
336 return;
337
338 if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
339
340 if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
341 *write_fault = pfns & range->flags[HMM_PFN_WRITE];
342 *fault = true;
343 }
344 return;
345 }
346
347
348 *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]);
349
350 if ((pfns & range->flags[HMM_PFN_WRITE]) &&
351 !(cpu_flags & range->flags[HMM_PFN_WRITE])) {
352 *write_fault = true;
353 *fault = true;
354 }
355 }
356
357 static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
358 const uint64_t *pfns, unsigned long npages,
359 uint64_t cpu_flags, bool *fault,
360 bool *write_fault)
361 {
362 unsigned long i;
363
364 if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) {
365 *fault = *write_fault = false;
366 return;
367 }
368
369 *fault = *write_fault = false;
370 for (i = 0; i < npages; ++i) {
371 hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags,
372 fault, write_fault);
373 if ((*write_fault))
374 return;
375 }
376 }
377
378 static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
379 struct mm_walk *walk)
380 {
381 struct hmm_vma_walk *hmm_vma_walk = walk->private;
382 struct hmm_range *range = hmm_vma_walk->range;
383 bool fault, write_fault;
384 unsigned long i, npages;
385 uint64_t *pfns;
386
387 i = (addr - range->start) >> PAGE_SHIFT;
388 npages = (end - addr) >> PAGE_SHIFT;
389 pfns = &range->pfns[i];
390 hmm_range_need_fault(hmm_vma_walk, pfns, npages,
391 0, &fault, &write_fault);
392 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
393 }
394
395 static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
396 {
397 if (pmd_protnone(pmd))
398 return 0;
399 return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
400 range->flags[HMM_PFN_WRITE] :
401 range->flags[HMM_PFN_VALID];
402 }
403
404 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
405 static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
406 unsigned long end, uint64_t *pfns, pmd_t pmd)
407 {
408 struct hmm_vma_walk *hmm_vma_walk = walk->private;
409 struct hmm_range *range = hmm_vma_walk->range;
410 unsigned long pfn, npages, i;
411 bool fault, write_fault;
412 uint64_t cpu_flags;
413
414 npages = (end - addr) >> PAGE_SHIFT;
415 cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
416 hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags,
417 &fault, &write_fault);
418
419 if (pmd_protnone(pmd) || fault || write_fault)
420 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
421
422 pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
423 for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
424 if (pmd_devmap(pmd)) {
425 hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
426 hmm_vma_walk->pgmap);
427 if (unlikely(!hmm_vma_walk->pgmap))
428 return -EBUSY;
429 }
430 pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
431 }
432 if (hmm_vma_walk->pgmap) {
433 put_dev_pagemap(hmm_vma_walk->pgmap);
434 hmm_vma_walk->pgmap = NULL;
435 }
436 hmm_vma_walk->last = end;
437 return 0;
438 }
439 #else
440
441 int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
442 unsigned long end, uint64_t *pfns, pmd_t pmd);
443 #endif
444
445 static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
446 {
447 if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
448 return 0;
449 return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
450 range->flags[HMM_PFN_WRITE] :
451 range->flags[HMM_PFN_VALID];
452 }
453
454 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
455 unsigned long end, pmd_t *pmdp, pte_t *ptep,
456 uint64_t *pfn)
457 {
458 struct hmm_vma_walk *hmm_vma_walk = walk->private;
459 struct hmm_range *range = hmm_vma_walk->range;
460 bool fault, write_fault;
461 uint64_t cpu_flags;
462 pte_t pte = *ptep;
463 uint64_t orig_pfn = *pfn;
464
465 *pfn = range->values[HMM_PFN_NONE];
466 fault = write_fault = false;
467
468 if (pte_none(pte)) {
469 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0,
470 &fault, &write_fault);
471 if (fault || write_fault)
472 goto fault;
473 return 0;
474 }
475
476 if (!pte_present(pte)) {
477 swp_entry_t entry = pte_to_swp_entry(pte);
478
479 if (!non_swap_entry(entry)) {
480 cpu_flags = pte_to_hmm_pfn_flags(range, pte);
481 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
482 &fault, &write_fault);
483 if (fault || write_fault)
484 goto fault;
485 return 0;
486 }
487
488
489
490
491
492 if (is_device_private_entry(entry)) {
493 cpu_flags = range->flags[HMM_PFN_VALID] |
494 range->flags[HMM_PFN_DEVICE_PRIVATE];
495 cpu_flags |= is_write_device_private_entry(entry) ?
496 range->flags[HMM_PFN_WRITE] : 0;
497 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
498 &fault, &write_fault);
499 if (fault || write_fault)
500 goto fault;
501 *pfn = hmm_device_entry_from_pfn(range,
502 swp_offset(entry));
503 *pfn |= cpu_flags;
504 return 0;
505 }
506
507 if (is_migration_entry(entry)) {
508 if (fault || write_fault) {
509 pte_unmap(ptep);
510 hmm_vma_walk->last = addr;
511 migration_entry_wait(walk->mm, pmdp, addr);
512 return -EBUSY;
513 }
514 return 0;
515 }
516
517
518 *pfn = range->values[HMM_PFN_ERROR];
519 return -EFAULT;
520 } else {
521 cpu_flags = pte_to_hmm_pfn_flags(range, pte);
522 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
523 &fault, &write_fault);
524 }
525
526 if (fault || write_fault)
527 goto fault;
528
529 if (pte_devmap(pte)) {
530 hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
531 hmm_vma_walk->pgmap);
532 if (unlikely(!hmm_vma_walk->pgmap))
533 return -EBUSY;
534 } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) {
535 *pfn = range->values[HMM_PFN_SPECIAL];
536 return -EFAULT;
537 }
538
539 *pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags;
540 return 0;
541
542 fault:
543 if (hmm_vma_walk->pgmap) {
544 put_dev_pagemap(hmm_vma_walk->pgmap);
545 hmm_vma_walk->pgmap = NULL;
546 }
547 pte_unmap(ptep);
548
549 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
550 }
551
552 static int hmm_vma_walk_pmd(pmd_t *pmdp,
553 unsigned long start,
554 unsigned long end,
555 struct mm_walk *walk)
556 {
557 struct hmm_vma_walk *hmm_vma_walk = walk->private;
558 struct hmm_range *range = hmm_vma_walk->range;
559 uint64_t *pfns = range->pfns;
560 unsigned long addr = start, i;
561 pte_t *ptep;
562 pmd_t pmd;
563
564 again:
565 pmd = READ_ONCE(*pmdp);
566 if (pmd_none(pmd))
567 return hmm_vma_walk_hole(start, end, walk);
568
569 if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
570 bool fault, write_fault;
571 unsigned long npages;
572 uint64_t *pfns;
573
574 i = (addr - range->start) >> PAGE_SHIFT;
575 npages = (end - addr) >> PAGE_SHIFT;
576 pfns = &range->pfns[i];
577
578 hmm_range_need_fault(hmm_vma_walk, pfns, npages,
579 0, &fault, &write_fault);
580 if (fault || write_fault) {
581 hmm_vma_walk->last = addr;
582 pmd_migration_entry_wait(walk->mm, pmdp);
583 return -EBUSY;
584 }
585 return 0;
586 } else if (!pmd_present(pmd))
587 return hmm_pfns_bad(start, end, walk);
588
589 if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
590
591
592
593
594
595
596
597
598
599 pmd = pmd_read_atomic(pmdp);
600 barrier();
601 if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
602 goto again;
603
604 i = (addr - range->start) >> PAGE_SHIFT;
605 return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd);
606 }
607
608
609
610
611
612
613
614 if (pmd_bad(pmd))
615 return hmm_pfns_bad(start, end, walk);
616
617 ptep = pte_offset_map(pmdp, addr);
618 i = (addr - range->start) >> PAGE_SHIFT;
619 for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
620 int r;
621
622 r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
623 if (r) {
624
625 hmm_vma_walk->last = addr;
626 return r;
627 }
628 }
629 if (hmm_vma_walk->pgmap) {
630
631
632
633
634
635
636 put_dev_pagemap(hmm_vma_walk->pgmap);
637 hmm_vma_walk->pgmap = NULL;
638 }
639 pte_unmap(ptep - 1);
640
641 hmm_vma_walk->last = addr;
642 return 0;
643 }
644
645 #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
646 defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
647 static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
648 {
649 if (!pud_present(pud))
650 return 0;
651 return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
652 range->flags[HMM_PFN_WRITE] :
653 range->flags[HMM_PFN_VALID];
654 }
655
656 static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
657 struct mm_walk *walk)
658 {
659 struct hmm_vma_walk *hmm_vma_walk = walk->private;
660 struct hmm_range *range = hmm_vma_walk->range;
661 unsigned long addr = start, next;
662 pmd_t *pmdp;
663 pud_t pud;
664 int ret;
665
666 again:
667 pud = READ_ONCE(*pudp);
668 if (pud_none(pud))
669 return hmm_vma_walk_hole(start, end, walk);
670
671 if (pud_huge(pud) && pud_devmap(pud)) {
672 unsigned long i, npages, pfn;
673 uint64_t *pfns, cpu_flags;
674 bool fault, write_fault;
675
676 if (!pud_present(pud))
677 return hmm_vma_walk_hole(start, end, walk);
678
679 i = (addr - range->start) >> PAGE_SHIFT;
680 npages = (end - addr) >> PAGE_SHIFT;
681 pfns = &range->pfns[i];
682
683 cpu_flags = pud_to_hmm_pfn_flags(range, pud);
684 hmm_range_need_fault(hmm_vma_walk, pfns, npages,
685 cpu_flags, &fault, &write_fault);
686 if (fault || write_fault)
687 return hmm_vma_walk_hole_(addr, end, fault,
688 write_fault, walk);
689
690 pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
691 for (i = 0; i < npages; ++i, ++pfn) {
692 hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
693 hmm_vma_walk->pgmap);
694 if (unlikely(!hmm_vma_walk->pgmap))
695 return -EBUSY;
696 pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
697 cpu_flags;
698 }
699 if (hmm_vma_walk->pgmap) {
700 put_dev_pagemap(hmm_vma_walk->pgmap);
701 hmm_vma_walk->pgmap = NULL;
702 }
703 hmm_vma_walk->last = end;
704 return 0;
705 }
706
707 split_huge_pud(walk->vma, pudp, addr);
708 if (pud_none(*pudp))
709 goto again;
710
711 pmdp = pmd_offset(pudp, addr);
712 do {
713 next = pmd_addr_end(addr, end);
714 ret = hmm_vma_walk_pmd(pmdp, addr, next, walk);
715 if (ret)
716 return ret;
717 } while (pmdp++, addr = next, addr != end);
718
719 return 0;
720 }
721 #else
722 #define hmm_vma_walk_pud NULL
723 #endif
724
725 #ifdef CONFIG_HUGETLB_PAGE
726 static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
727 unsigned long start, unsigned long end,
728 struct mm_walk *walk)
729 {
730 unsigned long addr = start, i, pfn;
731 struct hmm_vma_walk *hmm_vma_walk = walk->private;
732 struct hmm_range *range = hmm_vma_walk->range;
733 struct vm_area_struct *vma = walk->vma;
734 uint64_t orig_pfn, cpu_flags;
735 bool fault, write_fault;
736 spinlock_t *ptl;
737 pte_t entry;
738 int ret = 0;
739
740 ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
741 entry = huge_ptep_get(pte);
742
743 i = (start - range->start) >> PAGE_SHIFT;
744 orig_pfn = range->pfns[i];
745 range->pfns[i] = range->values[HMM_PFN_NONE];
746 cpu_flags = pte_to_hmm_pfn_flags(range, entry);
747 fault = write_fault = false;
748 hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
749 &fault, &write_fault);
750 if (fault || write_fault) {
751 ret = -ENOENT;
752 goto unlock;
753 }
754
755 pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
756 for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
757 range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
758 cpu_flags;
759 hmm_vma_walk->last = end;
760
761 unlock:
762 spin_unlock(ptl);
763
764 if (ret == -ENOENT)
765 return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
766
767 return ret;
768 }
769 #else
770 #define hmm_vma_walk_hugetlb_entry NULL
771 #endif
772
773 static void hmm_pfns_clear(struct hmm_range *range,
774 uint64_t *pfns,
775 unsigned long addr,
776 unsigned long end)
777 {
778 for (; addr < end; addr += PAGE_SIZE, pfns++)
779 *pfns = range->values[HMM_PFN_NONE];
780 }
781
782
783
784
785
786
787
788
789
790
791 int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
792 {
793 struct hmm *hmm = mirror->hmm;
794 unsigned long flags;
795
796 range->valid = false;
797 range->hmm = NULL;
798
799 if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1)))
800 return -EINVAL;
801 if (range->start >= range->end)
802 return -EINVAL;
803
804
805 if (!mmget_not_zero(hmm->mmu_notifier.mm))
806 return -EFAULT;
807
808
809 spin_lock_irqsave(&hmm->ranges_lock, flags);
810
811 range->hmm = hmm;
812 list_add(&range->list, &hmm->ranges);
813
814
815
816
817
818 if (!hmm->notifiers)
819 range->valid = true;
820 spin_unlock_irqrestore(&hmm->ranges_lock, flags);
821
822 return 0;
823 }
824 EXPORT_SYMBOL(hmm_range_register);
825
826
827
828
829
830
831
832
833 void hmm_range_unregister(struct hmm_range *range)
834 {
835 struct hmm *hmm = range->hmm;
836 unsigned long flags;
837
838 spin_lock_irqsave(&hmm->ranges_lock, flags);
839 list_del_init(&range->list);
840 spin_unlock_irqrestore(&hmm->ranges_lock, flags);
841
842
843 mmput(hmm->mmu_notifier.mm);
844
845
846
847
848
849
850 range->valid = false;
851 memset(&range->hmm, POISON_INUSE, sizeof(range->hmm));
852 }
853 EXPORT_SYMBOL(hmm_range_unregister);
854
855 static const struct mm_walk_ops hmm_walk_ops = {
856 .pud_entry = hmm_vma_walk_pud,
857 .pmd_entry = hmm_vma_walk_pmd,
858 .pte_hole = hmm_vma_walk_hole,
859 .hugetlb_entry = hmm_vma_walk_hugetlb_entry,
860 };
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890 long hmm_range_fault(struct hmm_range *range, unsigned int flags)
891 {
892 const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
893 unsigned long start = range->start, end;
894 struct hmm_vma_walk hmm_vma_walk;
895 struct hmm *hmm = range->hmm;
896 struct vm_area_struct *vma;
897 int ret;
898
899 lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem);
900
901 do {
902
903 if (!range->valid)
904 return -EBUSY;
905
906 vma = find_vma(hmm->mmu_notifier.mm, start);
907 if (vma == NULL || (vma->vm_flags & device_vma))
908 return -EFAULT;
909
910 if (!(vma->vm_flags & VM_READ)) {
911
912
913
914
915
916 hmm_pfns_clear(range, range->pfns,
917 range->start, range->end);
918 return -EPERM;
919 }
920
921 hmm_vma_walk.pgmap = NULL;
922 hmm_vma_walk.last = start;
923 hmm_vma_walk.flags = flags;
924 hmm_vma_walk.range = range;
925 end = min(range->end, vma->vm_end);
926
927 walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
928 &hmm_vma_walk);
929
930 do {
931 ret = walk_page_range(vma->vm_mm, start, end,
932 &hmm_walk_ops, &hmm_vma_walk);
933 start = hmm_vma_walk.last;
934
935
936 } while (ret == -EBUSY && range->valid);
937
938 if (ret) {
939 unsigned long i;
940
941 i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
942 hmm_pfns_clear(range, &range->pfns[i],
943 hmm_vma_walk.last, range->end);
944 return ret;
945 }
946 start = end;
947
948 } while (start < range->end);
949
950 return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
951 }
952 EXPORT_SYMBOL(hmm_range_fault);
953
954
955
956
957
958
959
960
961
962
963
964 long hmm_range_dma_map(struct hmm_range *range, struct device *device,
965 dma_addr_t *daddrs, unsigned int flags)
966 {
967 unsigned long i, npages, mapped;
968 long ret;
969
970 ret = hmm_range_fault(range, flags);
971 if (ret <= 0)
972 return ret ? ret : -EBUSY;
973
974 npages = (range->end - range->start) >> PAGE_SHIFT;
975 for (i = 0, mapped = 0; i < npages; ++i) {
976 enum dma_data_direction dir = DMA_TO_DEVICE;
977 struct page *page;
978
979
980
981
982
983
984
985
986
987 daddrs[i] = 0;
988
989 page = hmm_device_entry_to_page(range, range->pfns[i]);
990 if (page == NULL)
991 continue;
992
993
994 if (!range->valid) {
995 ret = -EBUSY;
996 goto unmap;
997 }
998
999
1000 if (range->pfns[i] & range->flags[HMM_PFN_WRITE])
1001 dir = DMA_BIDIRECTIONAL;
1002
1003 daddrs[i] = dma_map_page(device, page, 0, PAGE_SIZE, dir);
1004 if (dma_mapping_error(device, daddrs[i])) {
1005 ret = -EFAULT;
1006 goto unmap;
1007 }
1008
1009 mapped++;
1010 }
1011
1012 return mapped;
1013
1014 unmap:
1015 for (npages = i, i = 0; (i < npages) && mapped; ++i) {
1016 enum dma_data_direction dir = DMA_TO_DEVICE;
1017 struct page *page;
1018
1019 page = hmm_device_entry_to_page(range, range->pfns[i]);
1020 if (page == NULL)
1021 continue;
1022
1023 if (dma_mapping_error(device, daddrs[i]))
1024 continue;
1025
1026
1027 if (range->pfns[i] & range->flags[HMM_PFN_WRITE])
1028 dir = DMA_BIDIRECTIONAL;
1029
1030 dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
1031 mapped--;
1032 }
1033
1034 return ret;
1035 }
1036 EXPORT_SYMBOL(hmm_range_dma_map);
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051 long hmm_range_dma_unmap(struct hmm_range *range,
1052 struct device *device,
1053 dma_addr_t *daddrs,
1054 bool dirty)
1055 {
1056 unsigned long i, npages;
1057 long cpages = 0;
1058
1059
1060 if (range->end <= range->start)
1061 return -EINVAL;
1062 if (!daddrs)
1063 return -EINVAL;
1064 if (!range->pfns)
1065 return -EINVAL;
1066
1067 npages = (range->end - range->start) >> PAGE_SHIFT;
1068 for (i = 0; i < npages; ++i) {
1069 enum dma_data_direction dir = DMA_TO_DEVICE;
1070 struct page *page;
1071
1072 page = hmm_device_entry_to_page(range, range->pfns[i]);
1073 if (page == NULL)
1074 continue;
1075
1076
1077 if (range->pfns[i] & range->flags[HMM_PFN_WRITE]) {
1078 dir = DMA_BIDIRECTIONAL;
1079
1080
1081
1082
1083
1084 if (dirty)
1085 set_page_dirty(page);
1086 }
1087
1088
1089 dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
1090 range->pfns[i] = range->values[HMM_PFN_NONE];
1091
1092 daddrs[i] = 0;
1093 cpages++;
1094 }
1095
1096 return cpages;
1097 }
1098 EXPORT_SYMBOL(hmm_range_dma_unmap);