This source file includes following definitions.
- pe_order
- init_dax_wait_table
- dax_to_pfn
- dax_make_entry
- dax_is_locked
- dax_entry_order
- dax_is_pmd_entry
- dax_is_pte_entry
- dax_is_zero_entry
- dax_is_empty_entry
- dax_is_conflict
- dax_entry_waitqueue
- wake_exceptional_entry_func
- dax_wake_entry
- get_unlocked_entry
- wait_entry_unlocked
- put_unlocked_entry
- dax_unlock_entry
- dax_lock_entry
- dax_entry_size
- dax_end_pfn
- dax_associate_entry
- dax_disassociate_entry
- dax_busy_page
- dax_lock_page
- dax_unlock_page
- grab_mapping_entry
- dax_layout_busy_page
- __dax_invalidate_entry
- dax_delete_mapping_entry
- dax_invalidate_mapping_entry_sync
- copy_user_dax
- dax_insert_entry
- pgoff_address
- dax_entry_mkclean
- dax_writeback_one
- dax_writeback_mapping_range
- dax_iomap_sector
- dax_iomap_pfn
- dax_load_hole
- dax_range_is_aligned
- __dax_zero_page_range
- dax_iomap_actor
- dax_iomap_rw
- dax_fault_return
- dax_fault_is_synchronous
- dax_iomap_pte_fault
- dax_pmd_load_hole
- dax_iomap_pmd_fault
- dax_iomap_pmd_fault
- dax_iomap_fault
- dax_insert_pfn_mkwrite
- dax_finish_sync_fault
1
2
3
4
5
6
7
8
9 #include <linux/atomic.h>
10 #include <linux/blkdev.h>
11 #include <linux/buffer_head.h>
12 #include <linux/dax.h>
13 #include <linux/fs.h>
14 #include <linux/genhd.h>
15 #include <linux/highmem.h>
16 #include <linux/memcontrol.h>
17 #include <linux/mm.h>
18 #include <linux/mutex.h>
19 #include <linux/pagevec.h>
20 #include <linux/sched.h>
21 #include <linux/sched/signal.h>
22 #include <linux/uio.h>
23 #include <linux/vmstat.h>
24 #include <linux/pfn_t.h>
25 #include <linux/sizes.h>
26 #include <linux/mmu_notifier.h>
27 #include <linux/iomap.h>
28 #include <asm/pgalloc.h>
29
30 #define CREATE_TRACE_POINTS
31 #include <trace/events/fs_dax.h>
32
33 static inline unsigned int pe_order(enum page_entry_size pe_size)
34 {
35 if (pe_size == PE_SIZE_PTE)
36 return PAGE_SHIFT - PAGE_SHIFT;
37 if (pe_size == PE_SIZE_PMD)
38 return PMD_SHIFT - PAGE_SHIFT;
39 if (pe_size == PE_SIZE_PUD)
40 return PUD_SHIFT - PAGE_SHIFT;
41 return ~0;
42 }
43
44
45 #define DAX_WAIT_TABLE_BITS 12
46 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
47
48
49 #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
50 #define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
51
52
53 #define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
54
55 static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
56
57 static int __init init_dax_wait_table(void)
58 {
59 int i;
60
61 for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++)
62 init_waitqueue_head(wait_table + i);
63 return 0;
64 }
65 fs_initcall(init_dax_wait_table);
66
67
68
69
70
71
72
73
74
75
76
77 #define DAX_SHIFT (4)
78 #define DAX_LOCKED (1UL << 0)
79 #define DAX_PMD (1UL << 1)
80 #define DAX_ZERO_PAGE (1UL << 2)
81 #define DAX_EMPTY (1UL << 3)
82
83 static unsigned long dax_to_pfn(void *entry)
84 {
85 return xa_to_value(entry) >> DAX_SHIFT;
86 }
87
88 static void *dax_make_entry(pfn_t pfn, unsigned long flags)
89 {
90 return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
91 }
92
93 static bool dax_is_locked(void *entry)
94 {
95 return xa_to_value(entry) & DAX_LOCKED;
96 }
97
98 static unsigned int dax_entry_order(void *entry)
99 {
100 if (xa_to_value(entry) & DAX_PMD)
101 return PMD_ORDER;
102 return 0;
103 }
104
105 static unsigned long dax_is_pmd_entry(void *entry)
106 {
107 return xa_to_value(entry) & DAX_PMD;
108 }
109
110 static bool dax_is_pte_entry(void *entry)
111 {
112 return !(xa_to_value(entry) & DAX_PMD);
113 }
114
115 static int dax_is_zero_entry(void *entry)
116 {
117 return xa_to_value(entry) & DAX_ZERO_PAGE;
118 }
119
120 static int dax_is_empty_entry(void *entry)
121 {
122 return xa_to_value(entry) & DAX_EMPTY;
123 }
124
125
126
127
128
129 static bool dax_is_conflict(void *entry)
130 {
131 return entry == XA_RETRY_ENTRY;
132 }
133
134
135
136
137 struct exceptional_entry_key {
138 struct xarray *xa;
139 pgoff_t entry_start;
140 };
141
142 struct wait_exceptional_entry_queue {
143 wait_queue_entry_t wait;
144 struct exceptional_entry_key key;
145 };
146
147 static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
148 void *entry, struct exceptional_entry_key *key)
149 {
150 unsigned long hash;
151 unsigned long index = xas->xa_index;
152
153
154
155
156
157
158 if (dax_is_pmd_entry(entry))
159 index &= ~PG_PMD_COLOUR;
160 key->xa = xas->xa;
161 key->entry_start = index;
162
163 hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
164 return wait_table + hash;
165 }
166
167 static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
168 unsigned int mode, int sync, void *keyp)
169 {
170 struct exceptional_entry_key *key = keyp;
171 struct wait_exceptional_entry_queue *ewait =
172 container_of(wait, struct wait_exceptional_entry_queue, wait);
173
174 if (key->xa != ewait->key.xa ||
175 key->entry_start != ewait->key.entry_start)
176 return 0;
177 return autoremove_wake_function(wait, mode, sync, NULL);
178 }
179
180
181
182
183
184
185 static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
186 {
187 struct exceptional_entry_key key;
188 wait_queue_head_t *wq;
189
190 wq = dax_entry_waitqueue(xas, entry, &key);
191
192
193
194
195
196
197
198 if (waitqueue_active(wq))
199 __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
200 }
201
202
203
204
205
206
207
208
209
210
211
212 static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
213 {
214 void *entry;
215 struct wait_exceptional_entry_queue ewait;
216 wait_queue_head_t *wq;
217
218 init_wait(&ewait.wait);
219 ewait.wait.func = wake_exceptional_entry_func;
220
221 for (;;) {
222 entry = xas_find_conflict(xas);
223 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
224 return entry;
225 if (dax_entry_order(entry) < order)
226 return XA_RETRY_ENTRY;
227 if (!dax_is_locked(entry))
228 return entry;
229
230 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
231 prepare_to_wait_exclusive(wq, &ewait.wait,
232 TASK_UNINTERRUPTIBLE);
233 xas_unlock_irq(xas);
234 xas_reset(xas);
235 schedule();
236 finish_wait(wq, &ewait.wait);
237 xas_lock_irq(xas);
238 }
239 }
240
241
242
243
244
245
246 static void wait_entry_unlocked(struct xa_state *xas, void *entry)
247 {
248 struct wait_exceptional_entry_queue ewait;
249 wait_queue_head_t *wq;
250
251 init_wait(&ewait.wait);
252 ewait.wait.func = wake_exceptional_entry_func;
253
254 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
255
256
257
258
259
260
261 prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
262 xas_unlock_irq(xas);
263 schedule();
264 finish_wait(wq, &ewait.wait);
265 }
266
267 static void put_unlocked_entry(struct xa_state *xas, void *entry)
268 {
269
270 if (entry && !dax_is_conflict(entry))
271 dax_wake_entry(xas, entry, false);
272 }
273
274
275
276
277
278
279 static void dax_unlock_entry(struct xa_state *xas, void *entry)
280 {
281 void *old;
282
283 BUG_ON(dax_is_locked(entry));
284 xas_reset(xas);
285 xas_lock_irq(xas);
286 old = xas_store(xas, entry);
287 xas_unlock_irq(xas);
288 BUG_ON(!dax_is_locked(old));
289 dax_wake_entry(xas, entry, false);
290 }
291
292
293
294
295 static void *dax_lock_entry(struct xa_state *xas, void *entry)
296 {
297 unsigned long v = xa_to_value(entry);
298 return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
299 }
300
301 static unsigned long dax_entry_size(void *entry)
302 {
303 if (dax_is_zero_entry(entry))
304 return 0;
305 else if (dax_is_empty_entry(entry))
306 return 0;
307 else if (dax_is_pmd_entry(entry))
308 return PMD_SIZE;
309 else
310 return PAGE_SIZE;
311 }
312
313 static unsigned long dax_end_pfn(void *entry)
314 {
315 return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
316 }
317
318
319
320
321
322 #define for_each_mapped_pfn(entry, pfn) \
323 for (pfn = dax_to_pfn(entry); \
324 pfn < dax_end_pfn(entry); pfn++)
325
326
327
328
329
330
331 static void dax_associate_entry(void *entry, struct address_space *mapping,
332 struct vm_area_struct *vma, unsigned long address)
333 {
334 unsigned long size = dax_entry_size(entry), pfn, index;
335 int i = 0;
336
337 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
338 return;
339
340 index = linear_page_index(vma, address & ~(size - 1));
341 for_each_mapped_pfn(entry, pfn) {
342 struct page *page = pfn_to_page(pfn);
343
344 WARN_ON_ONCE(page->mapping);
345 page->mapping = mapping;
346 page->index = index + i++;
347 }
348 }
349
350 static void dax_disassociate_entry(void *entry, struct address_space *mapping,
351 bool trunc)
352 {
353 unsigned long pfn;
354
355 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
356 return;
357
358 for_each_mapped_pfn(entry, pfn) {
359 struct page *page = pfn_to_page(pfn);
360
361 WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
362 WARN_ON_ONCE(page->mapping && page->mapping != mapping);
363 page->mapping = NULL;
364 page->index = 0;
365 }
366 }
367
368 static struct page *dax_busy_page(void *entry)
369 {
370 unsigned long pfn;
371
372 for_each_mapped_pfn(entry, pfn) {
373 struct page *page = pfn_to_page(pfn);
374
375 if (page_ref_count(page) > 1)
376 return page;
377 }
378 return NULL;
379 }
380
381
382
383
384
385
386
387
388
389 dax_entry_t dax_lock_page(struct page *page)
390 {
391 XA_STATE(xas, NULL, 0);
392 void *entry;
393
394
395 rcu_read_lock();
396 for (;;) {
397 struct address_space *mapping = READ_ONCE(page->mapping);
398
399 entry = NULL;
400 if (!mapping || !dax_mapping(mapping))
401 break;
402
403
404
405
406
407
408
409
410 entry = (void *)~0UL;
411 if (S_ISCHR(mapping->host->i_mode))
412 break;
413
414 xas.xa = &mapping->i_pages;
415 xas_lock_irq(&xas);
416 if (mapping != page->mapping) {
417 xas_unlock_irq(&xas);
418 continue;
419 }
420 xas_set(&xas, page->index);
421 entry = xas_load(&xas);
422 if (dax_is_locked(entry)) {
423 rcu_read_unlock();
424 wait_entry_unlocked(&xas, entry);
425 rcu_read_lock();
426 continue;
427 }
428 dax_lock_entry(&xas, entry);
429 xas_unlock_irq(&xas);
430 break;
431 }
432 rcu_read_unlock();
433 return (dax_entry_t)entry;
434 }
435
436 void dax_unlock_page(struct page *page, dax_entry_t cookie)
437 {
438 struct address_space *mapping = page->mapping;
439 XA_STATE(xas, &mapping->i_pages, page->index);
440
441 if (S_ISCHR(mapping->host->i_mode))
442 return;
443
444 dax_unlock_entry(&xas, (void *)cookie);
445 }
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476 static void *grab_mapping_entry(struct xa_state *xas,
477 struct address_space *mapping, unsigned int order)
478 {
479 unsigned long index = xas->xa_index;
480 bool pmd_downgrade = false;
481 void *entry;
482
483 retry:
484 xas_lock_irq(xas);
485 entry = get_unlocked_entry(xas, order);
486
487 if (entry) {
488 if (dax_is_conflict(entry))
489 goto fallback;
490 if (!xa_is_value(entry)) {
491 xas_set_err(xas, EIO);
492 goto out_unlock;
493 }
494
495 if (order == 0) {
496 if (dax_is_pmd_entry(entry) &&
497 (dax_is_zero_entry(entry) ||
498 dax_is_empty_entry(entry))) {
499 pmd_downgrade = true;
500 }
501 }
502 }
503
504 if (pmd_downgrade) {
505
506
507
508
509 dax_lock_entry(xas, entry);
510
511
512
513
514
515
516 if (dax_is_zero_entry(entry)) {
517 xas_unlock_irq(xas);
518 unmap_mapping_pages(mapping,
519 xas->xa_index & ~PG_PMD_COLOUR,
520 PG_PMD_NR, false);
521 xas_reset(xas);
522 xas_lock_irq(xas);
523 }
524
525 dax_disassociate_entry(entry, mapping, false);
526 xas_store(xas, NULL);
527 dax_wake_entry(xas, entry, true);
528 mapping->nrexceptional--;
529 entry = NULL;
530 xas_set(xas, index);
531 }
532
533 if (entry) {
534 dax_lock_entry(xas, entry);
535 } else {
536 unsigned long flags = DAX_EMPTY;
537
538 if (order > 0)
539 flags |= DAX_PMD;
540 entry = dax_make_entry(pfn_to_pfn_t(0), flags);
541 dax_lock_entry(xas, entry);
542 if (xas_error(xas))
543 goto out_unlock;
544 mapping->nrexceptional++;
545 }
546
547 out_unlock:
548 xas_unlock_irq(xas);
549 if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM))
550 goto retry;
551 if (xas->xa_node == XA_ERROR(-ENOMEM))
552 return xa_mk_internal(VM_FAULT_OOM);
553 if (xas_error(xas))
554 return xa_mk_internal(VM_FAULT_SIGBUS);
555 return entry;
556 fallback:
557 xas_unlock_irq(xas);
558 return xa_mk_internal(VM_FAULT_FALLBACK);
559 }
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576 struct page *dax_layout_busy_page(struct address_space *mapping)
577 {
578 XA_STATE(xas, &mapping->i_pages, 0);
579 void *entry;
580 unsigned int scanned = 0;
581 struct page *page = NULL;
582
583
584
585
586 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
587 return NULL;
588
589 if (!dax_mapping(mapping) || !mapping_mapped(mapping))
590 return NULL;
591
592
593
594
595
596
597
598
599
600
601
602
603
604 unmap_mapping_range(mapping, 0, 0, 0);
605
606 xas_lock_irq(&xas);
607 xas_for_each(&xas, entry, ULONG_MAX) {
608 if (WARN_ON_ONCE(!xa_is_value(entry)))
609 continue;
610 if (unlikely(dax_is_locked(entry)))
611 entry = get_unlocked_entry(&xas, 0);
612 if (entry)
613 page = dax_busy_page(entry);
614 put_unlocked_entry(&xas, entry);
615 if (page)
616 break;
617 if (++scanned % XA_CHECK_SCHED)
618 continue;
619
620 xas_pause(&xas);
621 xas_unlock_irq(&xas);
622 cond_resched();
623 xas_lock_irq(&xas);
624 }
625 xas_unlock_irq(&xas);
626 return page;
627 }
628 EXPORT_SYMBOL_GPL(dax_layout_busy_page);
629
630 static int __dax_invalidate_entry(struct address_space *mapping,
631 pgoff_t index, bool trunc)
632 {
633 XA_STATE(xas, &mapping->i_pages, index);
634 int ret = 0;
635 void *entry;
636
637 xas_lock_irq(&xas);
638 entry = get_unlocked_entry(&xas, 0);
639 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
640 goto out;
641 if (!trunc &&
642 (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) ||
643 xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE)))
644 goto out;
645 dax_disassociate_entry(entry, mapping, trunc);
646 xas_store(&xas, NULL);
647 mapping->nrexceptional--;
648 ret = 1;
649 out:
650 put_unlocked_entry(&xas, entry);
651 xas_unlock_irq(&xas);
652 return ret;
653 }
654
655
656
657
658
659 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
660 {
661 int ret = __dax_invalidate_entry(mapping, index, true);
662
663
664
665
666
667
668
669
670 WARN_ON_ONCE(!ret);
671 return ret;
672 }
673
674
675
676
677 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
678 pgoff_t index)
679 {
680 return __dax_invalidate_entry(mapping, index, false);
681 }
682
683 static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
684 sector_t sector, size_t size, struct page *to,
685 unsigned long vaddr)
686 {
687 void *vto, *kaddr;
688 pgoff_t pgoff;
689 long rc;
690 int id;
691
692 rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
693 if (rc)
694 return rc;
695
696 id = dax_read_lock();
697 rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL);
698 if (rc < 0) {
699 dax_read_unlock(id);
700 return rc;
701 }
702 vto = kmap_atomic(to);
703 copy_user_page(vto, (void __force *)kaddr, vaddr, to);
704 kunmap_atomic(vto);
705 dax_read_unlock(id);
706 return 0;
707 }
708
709
710
711
712
713
714
715
716 static void *dax_insert_entry(struct xa_state *xas,
717 struct address_space *mapping, struct vm_fault *vmf,
718 void *entry, pfn_t pfn, unsigned long flags, bool dirty)
719 {
720 void *new_entry = dax_make_entry(pfn, flags);
721
722 if (dirty)
723 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
724
725 if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
726 unsigned long index = xas->xa_index;
727
728 if (dax_is_pmd_entry(entry))
729 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
730 PG_PMD_NR, false);
731 else
732 unmap_mapping_pages(mapping, index, 1, false);
733 }
734
735 xas_reset(xas);
736 xas_lock_irq(xas);
737 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
738 void *old;
739
740 dax_disassociate_entry(entry, mapping, false);
741 dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
742
743
744
745
746
747
748
749
750 old = dax_lock_entry(xas, new_entry);
751 WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
752 DAX_LOCKED));
753 entry = new_entry;
754 } else {
755 xas_load(xas);
756 }
757
758 if (dirty)
759 xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
760
761 xas_unlock_irq(xas);
762 return entry;
763 }
764
765 static inline
766 unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
767 {
768 unsigned long address;
769
770 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
771 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
772 return address;
773 }
774
775
776 static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
777 unsigned long pfn)
778 {
779 struct vm_area_struct *vma;
780 pte_t pte, *ptep = NULL;
781 pmd_t *pmdp = NULL;
782 spinlock_t *ptl;
783
784 i_mmap_lock_read(mapping);
785 vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
786 struct mmu_notifier_range range;
787 unsigned long address;
788
789 cond_resched();
790
791 if (!(vma->vm_flags & VM_SHARED))
792 continue;
793
794 address = pgoff_address(index, vma);
795
796
797
798
799
800
801 if (follow_pte_pmd(vma->vm_mm, address, &range,
802 &ptep, &pmdp, &ptl))
803 continue;
804
805
806
807
808
809
810
811
812 if (pmdp) {
813 #ifdef CONFIG_FS_DAX_PMD
814 pmd_t pmd;
815
816 if (pfn != pmd_pfn(*pmdp))
817 goto unlock_pmd;
818 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
819 goto unlock_pmd;
820
821 flush_cache_page(vma, address, pfn);
822 pmd = pmdp_invalidate(vma, address, pmdp);
823 pmd = pmd_wrprotect(pmd);
824 pmd = pmd_mkclean(pmd);
825 set_pmd_at(vma->vm_mm, address, pmdp, pmd);
826 unlock_pmd:
827 #endif
828 spin_unlock(ptl);
829 } else {
830 if (pfn != pte_pfn(*ptep))
831 goto unlock_pte;
832 if (!pte_dirty(*ptep) && !pte_write(*ptep))
833 goto unlock_pte;
834
835 flush_cache_page(vma, address, pfn);
836 pte = ptep_clear_flush(vma, address, ptep);
837 pte = pte_wrprotect(pte);
838 pte = pte_mkclean(pte);
839 set_pte_at(vma->vm_mm, address, ptep, pte);
840 unlock_pte:
841 pte_unmap_unlock(ptep, ptl);
842 }
843
844 mmu_notifier_invalidate_range_end(&range);
845 }
846 i_mmap_unlock_read(mapping);
847 }
848
849 static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
850 struct address_space *mapping, void *entry)
851 {
852 unsigned long pfn, index, count;
853 long ret = 0;
854
855
856
857
858
859 if (WARN_ON(!xa_is_value(entry)))
860 return -EIO;
861
862 if (unlikely(dax_is_locked(entry))) {
863 void *old_entry = entry;
864
865 entry = get_unlocked_entry(xas, 0);
866
867
868 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
869 goto put_unlocked;
870
871
872
873
874
875 if (dax_to_pfn(old_entry) != dax_to_pfn(entry))
876 goto put_unlocked;
877 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
878 dax_is_zero_entry(entry))) {
879 ret = -EIO;
880 goto put_unlocked;
881 }
882
883
884 if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE))
885 goto put_unlocked;
886 }
887
888
889 dax_lock_entry(xas, entry);
890
891
892
893
894
895
896
897
898 xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE);
899 xas_unlock_irq(xas);
900
901
902
903
904
905
906
907
908 pfn = dax_to_pfn(entry);
909 count = 1UL << dax_entry_order(entry);
910 index = xas->xa_index & ~(count - 1);
911
912 dax_entry_mkclean(mapping, index, pfn);
913 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);
914
915
916
917
918
919
920 xas_reset(xas);
921 xas_lock_irq(xas);
922 xas_store(xas, entry);
923 xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
924 dax_wake_entry(xas, entry, false);
925
926 trace_dax_writeback_one(mapping->host, index, count);
927 return ret;
928
929 put_unlocked:
930 put_unlocked_entry(xas, entry);
931 return ret;
932 }
933
934
935
936
937
938
939 int dax_writeback_mapping_range(struct address_space *mapping,
940 struct block_device *bdev, struct writeback_control *wbc)
941 {
942 XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
943 struct inode *inode = mapping->host;
944 pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
945 struct dax_device *dax_dev;
946 void *entry;
947 int ret = 0;
948 unsigned int scanned = 0;
949
950 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
951 return -EIO;
952
953 if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
954 return 0;
955
956 dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
957 if (!dax_dev)
958 return -EIO;
959
960 trace_dax_writeback_range(inode, xas.xa_index, end_index);
961
962 tag_pages_for_writeback(mapping, xas.xa_index, end_index);
963
964 xas_lock_irq(&xas);
965 xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) {
966 ret = dax_writeback_one(&xas, dax_dev, mapping, entry);
967 if (ret < 0) {
968 mapping_set_error(mapping, ret);
969 break;
970 }
971 if (++scanned % XA_CHECK_SCHED)
972 continue;
973
974 xas_pause(&xas);
975 xas_unlock_irq(&xas);
976 cond_resched();
977 xas_lock_irq(&xas);
978 }
979 xas_unlock_irq(&xas);
980 put_dax(dax_dev);
981 trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
982 return ret;
983 }
984 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
985
986 static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
987 {
988 return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
989 }
990
991 static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
992 pfn_t *pfnp)
993 {
994 const sector_t sector = dax_iomap_sector(iomap, pos);
995 pgoff_t pgoff;
996 int id, rc;
997 long length;
998
999 rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
1000 if (rc)
1001 return rc;
1002 id = dax_read_lock();
1003 length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
1004 NULL, pfnp);
1005 if (length < 0) {
1006 rc = length;
1007 goto out;
1008 }
1009 rc = -EINVAL;
1010 if (PFN_PHYS(length) < size)
1011 goto out;
1012 if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
1013 goto out;
1014
1015 if (length > 1 && !pfn_t_devmap(*pfnp))
1016 goto out;
1017 rc = 0;
1018 out:
1019 dax_read_unlock(id);
1020 return rc;
1021 }
1022
1023
1024
1025
1026
1027
1028
1029
1030 static vm_fault_t dax_load_hole(struct xa_state *xas,
1031 struct address_space *mapping, void **entry,
1032 struct vm_fault *vmf)
1033 {
1034 struct inode *inode = mapping->host;
1035 unsigned long vaddr = vmf->address;
1036 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
1037 vm_fault_t ret;
1038
1039 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1040 DAX_ZERO_PAGE, false);
1041
1042 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1043 trace_dax_load_hole(inode, vmf, ret);
1044 return ret;
1045 }
1046
1047 static bool dax_range_is_aligned(struct block_device *bdev,
1048 unsigned int offset, unsigned int length)
1049 {
1050 unsigned short sector_size = bdev_logical_block_size(bdev);
1051
1052 if (!IS_ALIGNED(offset, sector_size))
1053 return false;
1054 if (!IS_ALIGNED(length, sector_size))
1055 return false;
1056
1057 return true;
1058 }
1059
1060 int __dax_zero_page_range(struct block_device *bdev,
1061 struct dax_device *dax_dev, sector_t sector,
1062 unsigned int offset, unsigned int size)
1063 {
1064 if (dax_range_is_aligned(bdev, offset, size)) {
1065 sector_t start_sector = sector + (offset >> 9);
1066
1067 return blkdev_issue_zeroout(bdev, start_sector,
1068 size >> 9, GFP_NOFS, 0);
1069 } else {
1070 pgoff_t pgoff;
1071 long rc, id;
1072 void *kaddr;
1073
1074 rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
1075 if (rc)
1076 return rc;
1077
1078 id = dax_read_lock();
1079 rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
1080 if (rc < 0) {
1081 dax_read_unlock(id);
1082 return rc;
1083 }
1084 memset(kaddr + offset, 0, size);
1085 dax_flush(dax_dev, kaddr + offset, size);
1086 dax_read_unlock(id);
1087 }
1088 return 0;
1089 }
1090 EXPORT_SYMBOL_GPL(__dax_zero_page_range);
1091
1092 static loff_t
1093 dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
1094 struct iomap *iomap)
1095 {
1096 struct block_device *bdev = iomap->bdev;
1097 struct dax_device *dax_dev = iomap->dax_dev;
1098 struct iov_iter *iter = data;
1099 loff_t end = pos + length, done = 0;
1100 ssize_t ret = 0;
1101 size_t xfer;
1102 int id;
1103
1104 if (iov_iter_rw(iter) == READ) {
1105 end = min(end, i_size_read(inode));
1106 if (pos >= end)
1107 return 0;
1108
1109 if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
1110 return iov_iter_zero(min(length, end - pos), iter);
1111 }
1112
1113 if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
1114 return -EIO;
1115
1116
1117
1118
1119
1120
1121 if (iomap->flags & IOMAP_F_NEW) {
1122 invalidate_inode_pages2_range(inode->i_mapping,
1123 pos >> PAGE_SHIFT,
1124 (end - 1) >> PAGE_SHIFT);
1125 }
1126
1127 id = dax_read_lock();
1128 while (pos < end) {
1129 unsigned offset = pos & (PAGE_SIZE - 1);
1130 const size_t size = ALIGN(length + offset, PAGE_SIZE);
1131 const sector_t sector = dax_iomap_sector(iomap, pos);
1132 ssize_t map_len;
1133 pgoff_t pgoff;
1134 void *kaddr;
1135
1136 if (fatal_signal_pending(current)) {
1137 ret = -EINTR;
1138 break;
1139 }
1140
1141 ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
1142 if (ret)
1143 break;
1144
1145 map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
1146 &kaddr, NULL);
1147 if (map_len < 0) {
1148 ret = map_len;
1149 break;
1150 }
1151
1152 map_len = PFN_PHYS(map_len);
1153 kaddr += offset;
1154 map_len -= offset;
1155 if (map_len > end - pos)
1156 map_len = end - pos;
1157
1158
1159
1160
1161
1162
1163 if (iov_iter_rw(iter) == WRITE)
1164 xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
1165 map_len, iter);
1166 else
1167 xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
1168 map_len, iter);
1169
1170 pos += xfer;
1171 length -= xfer;
1172 done += xfer;
1173
1174 if (xfer == 0)
1175 ret = -EFAULT;
1176 if (xfer < map_len)
1177 break;
1178 }
1179 dax_read_unlock(id);
1180
1181 return done ? done : ret;
1182 }
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194 ssize_t
1195 dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1196 const struct iomap_ops *ops)
1197 {
1198 struct address_space *mapping = iocb->ki_filp->f_mapping;
1199 struct inode *inode = mapping->host;
1200 loff_t pos = iocb->ki_pos, ret = 0, done = 0;
1201 unsigned flags = 0;
1202
1203 if (iov_iter_rw(iter) == WRITE) {
1204 lockdep_assert_held_write(&inode->i_rwsem);
1205 flags |= IOMAP_WRITE;
1206 } else {
1207 lockdep_assert_held(&inode->i_rwsem);
1208 }
1209
1210 if (iocb->ki_flags & IOCB_NOWAIT)
1211 flags |= IOMAP_NOWAIT;
1212
1213 while (iov_iter_count(iter)) {
1214 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
1215 iter, dax_iomap_actor);
1216 if (ret <= 0)
1217 break;
1218 pos += ret;
1219 done += ret;
1220 }
1221
1222 iocb->ki_pos += done;
1223 return done ? done : ret;
1224 }
1225 EXPORT_SYMBOL_GPL(dax_iomap_rw);
1226
1227 static vm_fault_t dax_fault_return(int error)
1228 {
1229 if (error == 0)
1230 return VM_FAULT_NOPAGE;
1231 return vmf_error(error);
1232 }
1233
1234
1235
1236
1237
1238 static bool dax_fault_is_synchronous(unsigned long flags,
1239 struct vm_area_struct *vma, struct iomap *iomap)
1240 {
1241 return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
1242 && (iomap->flags & IOMAP_F_DIRTY);
1243 }
1244
1245 static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1246 int *iomap_errp, const struct iomap_ops *ops)
1247 {
1248 struct vm_area_struct *vma = vmf->vma;
1249 struct address_space *mapping = vma->vm_file->f_mapping;
1250 XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
1251 struct inode *inode = mapping->host;
1252 unsigned long vaddr = vmf->address;
1253 loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
1254 struct iomap iomap = { 0 };
1255 unsigned flags = IOMAP_FAULT;
1256 int error, major = 0;
1257 bool write = vmf->flags & FAULT_FLAG_WRITE;
1258 bool sync;
1259 vm_fault_t ret = 0;
1260 void *entry;
1261 pfn_t pfn;
1262
1263 trace_dax_pte_fault(inode, vmf, ret);
1264
1265
1266
1267
1268
1269 if (pos >= i_size_read(inode)) {
1270 ret = VM_FAULT_SIGBUS;
1271 goto out;
1272 }
1273
1274 if (write && !vmf->cow_page)
1275 flags |= IOMAP_WRITE;
1276
1277 entry = grab_mapping_entry(&xas, mapping, 0);
1278 if (xa_is_internal(entry)) {
1279 ret = xa_to_internal(entry);
1280 goto out;
1281 }
1282
1283
1284
1285
1286
1287
1288
1289 if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
1290 ret = VM_FAULT_NOPAGE;
1291 goto unlock_entry;
1292 }
1293
1294
1295
1296
1297
1298
1299 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
1300 if (iomap_errp)
1301 *iomap_errp = error;
1302 if (error) {
1303 ret = dax_fault_return(error);
1304 goto unlock_entry;
1305 }
1306 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
1307 error = -EIO;
1308 goto error_finish_iomap;
1309 }
1310
1311 if (vmf->cow_page) {
1312 sector_t sector = dax_iomap_sector(&iomap, pos);
1313
1314 switch (iomap.type) {
1315 case IOMAP_HOLE:
1316 case IOMAP_UNWRITTEN:
1317 clear_user_highpage(vmf->cow_page, vaddr);
1318 break;
1319 case IOMAP_MAPPED:
1320 error = copy_user_dax(iomap.bdev, iomap.dax_dev,
1321 sector, PAGE_SIZE, vmf->cow_page, vaddr);
1322 break;
1323 default:
1324 WARN_ON_ONCE(1);
1325 error = -EIO;
1326 break;
1327 }
1328
1329 if (error)
1330 goto error_finish_iomap;
1331
1332 __SetPageUptodate(vmf->cow_page);
1333 ret = finish_fault(vmf);
1334 if (!ret)
1335 ret = VM_FAULT_DONE_COW;
1336 goto finish_iomap;
1337 }
1338
1339 sync = dax_fault_is_synchronous(flags, vma, &iomap);
1340
1341 switch (iomap.type) {
1342 case IOMAP_MAPPED:
1343 if (iomap.flags & IOMAP_F_NEW) {
1344 count_vm_event(PGMAJFAULT);
1345 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
1346 major = VM_FAULT_MAJOR;
1347 }
1348 error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
1349 if (error < 0)
1350 goto error_finish_iomap;
1351
1352 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1353 0, write && !sync);
1354
1355
1356
1357
1358
1359
1360
1361 if (sync) {
1362 if (WARN_ON_ONCE(!pfnp)) {
1363 error = -EIO;
1364 goto error_finish_iomap;
1365 }
1366 *pfnp = pfn;
1367 ret = VM_FAULT_NEEDDSYNC | major;
1368 goto finish_iomap;
1369 }
1370 trace_dax_insert_mapping(inode, vmf, entry);
1371 if (write)
1372 ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn);
1373 else
1374 ret = vmf_insert_mixed(vma, vaddr, pfn);
1375
1376 goto finish_iomap;
1377 case IOMAP_UNWRITTEN:
1378 case IOMAP_HOLE:
1379 if (!write) {
1380 ret = dax_load_hole(&xas, mapping, &entry, vmf);
1381 goto finish_iomap;
1382 }
1383
1384 default:
1385 WARN_ON_ONCE(1);
1386 error = -EIO;
1387 break;
1388 }
1389
1390 error_finish_iomap:
1391 ret = dax_fault_return(error);
1392 finish_iomap:
1393 if (ops->iomap_end) {
1394 int copied = PAGE_SIZE;
1395
1396 if (ret & VM_FAULT_ERROR)
1397 copied = 0;
1398
1399
1400
1401
1402
1403
1404 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
1405 }
1406 unlock_entry:
1407 dax_unlock_entry(&xas, entry);
1408 out:
1409 trace_dax_pte_fault_done(inode, vmf, ret);
1410 return ret | major;
1411 }
1412
1413 #ifdef CONFIG_FS_DAX_PMD
1414 static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
1415 struct iomap *iomap, void **entry)
1416 {
1417 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1418 unsigned long pmd_addr = vmf->address & PMD_MASK;
1419 struct vm_area_struct *vma = vmf->vma;
1420 struct inode *inode = mapping->host;
1421 pgtable_t pgtable = NULL;
1422 struct page *zero_page;
1423 spinlock_t *ptl;
1424 pmd_t pmd_entry;
1425 pfn_t pfn;
1426
1427 zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
1428
1429 if (unlikely(!zero_page))
1430 goto fallback;
1431
1432 pfn = page_to_pfn_t(zero_page);
1433 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1434 DAX_PMD | DAX_ZERO_PAGE, false);
1435
1436 if (arch_needs_pgtable_deposit()) {
1437 pgtable = pte_alloc_one(vma->vm_mm);
1438 if (!pgtable)
1439 return VM_FAULT_OOM;
1440 }
1441
1442 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
1443 if (!pmd_none(*(vmf->pmd))) {
1444 spin_unlock(ptl);
1445 goto fallback;
1446 }
1447
1448 if (pgtable) {
1449 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
1450 mm_inc_nr_ptes(vma->vm_mm);
1451 }
1452 pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
1453 pmd_entry = pmd_mkhuge(pmd_entry);
1454 set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
1455 spin_unlock(ptl);
1456 trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
1457 return VM_FAULT_NOPAGE;
1458
1459 fallback:
1460 if (pgtable)
1461 pte_free(vma->vm_mm, pgtable);
1462 trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
1463 return VM_FAULT_FALLBACK;
1464 }
1465
1466 static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1467 const struct iomap_ops *ops)
1468 {
1469 struct vm_area_struct *vma = vmf->vma;
1470 struct address_space *mapping = vma->vm_file->f_mapping;
1471 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
1472 unsigned long pmd_addr = vmf->address & PMD_MASK;
1473 bool write = vmf->flags & FAULT_FLAG_WRITE;
1474 bool sync;
1475 unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
1476 struct inode *inode = mapping->host;
1477 vm_fault_t result = VM_FAULT_FALLBACK;
1478 struct iomap iomap = { 0 };
1479 pgoff_t max_pgoff;
1480 void *entry;
1481 loff_t pos;
1482 int error;
1483 pfn_t pfn;
1484
1485
1486
1487
1488
1489
1490 max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1491
1492 trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
1493
1494
1495
1496
1497
1498
1499
1500 if ((vmf->pgoff & PG_PMD_COLOUR) !=
1501 ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
1502 goto fallback;
1503
1504
1505 if (write && !(vma->vm_flags & VM_SHARED))
1506 goto fallback;
1507
1508
1509 if (pmd_addr < vma->vm_start)
1510 goto fallback;
1511 if ((pmd_addr + PMD_SIZE) > vma->vm_end)
1512 goto fallback;
1513
1514 if (xas.xa_index >= max_pgoff) {
1515 result = VM_FAULT_SIGBUS;
1516 goto out;
1517 }
1518
1519
1520 if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
1521 goto fallback;
1522
1523
1524
1525
1526
1527
1528
1529 entry = grab_mapping_entry(&xas, mapping, PMD_ORDER);
1530 if (xa_is_internal(entry)) {
1531 result = xa_to_internal(entry);
1532 goto fallback;
1533 }
1534
1535
1536
1537
1538
1539
1540
1541 if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
1542 !pmd_devmap(*vmf->pmd)) {
1543 result = 0;
1544 goto unlock_entry;
1545 }
1546
1547
1548
1549
1550
1551
1552 pos = (loff_t)xas.xa_index << PAGE_SHIFT;
1553 error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
1554 if (error)
1555 goto unlock_entry;
1556
1557 if (iomap.offset + iomap.length < pos + PMD_SIZE)
1558 goto finish_iomap;
1559
1560 sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
1561
1562 switch (iomap.type) {
1563 case IOMAP_MAPPED:
1564 error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
1565 if (error < 0)
1566 goto finish_iomap;
1567
1568 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1569 DAX_PMD, write && !sync);
1570
1571
1572
1573
1574
1575
1576
1577 if (sync) {
1578 if (WARN_ON_ONCE(!pfnp))
1579 goto finish_iomap;
1580 *pfnp = pfn;
1581 result = VM_FAULT_NEEDDSYNC;
1582 goto finish_iomap;
1583 }
1584
1585 trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
1586 result = vmf_insert_pfn_pmd(vmf, pfn, write);
1587 break;
1588 case IOMAP_UNWRITTEN:
1589 case IOMAP_HOLE:
1590 if (WARN_ON_ONCE(write))
1591 break;
1592 result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
1593 break;
1594 default:
1595 WARN_ON_ONCE(1);
1596 break;
1597 }
1598
1599 finish_iomap:
1600 if (ops->iomap_end) {
1601 int copied = PMD_SIZE;
1602
1603 if (result == VM_FAULT_FALLBACK)
1604 copied = 0;
1605
1606
1607
1608
1609
1610
1611 ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
1612 &iomap);
1613 }
1614 unlock_entry:
1615 dax_unlock_entry(&xas, entry);
1616 fallback:
1617 if (result == VM_FAULT_FALLBACK) {
1618 split_huge_pmd(vma, vmf->pmd, vmf->address);
1619 count_vm_event(THP_FAULT_FALLBACK);
1620 }
1621 out:
1622 trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
1623 return result;
1624 }
1625 #else
1626 static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1627 const struct iomap_ops *ops)
1628 {
1629 return VM_FAULT_FALLBACK;
1630 }
1631 #endif
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646 vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
1647 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
1648 {
1649 switch (pe_size) {
1650 case PE_SIZE_PTE:
1651 return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
1652 case PE_SIZE_PMD:
1653 return dax_iomap_pmd_fault(vmf, pfnp, ops);
1654 default:
1655 return VM_FAULT_FALLBACK;
1656 }
1657 }
1658 EXPORT_SYMBOL_GPL(dax_iomap_fault);
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669 static vm_fault_t
1670 dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
1671 {
1672 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1673 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
1674 void *entry;
1675 vm_fault_t ret;
1676
1677 xas_lock_irq(&xas);
1678 entry = get_unlocked_entry(&xas, order);
1679
1680 if (!entry || dax_is_conflict(entry) ||
1681 (order == 0 && !dax_is_pte_entry(entry))) {
1682 put_unlocked_entry(&xas, entry);
1683 xas_unlock_irq(&xas);
1684 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
1685 VM_FAULT_NOPAGE);
1686 return VM_FAULT_NOPAGE;
1687 }
1688 xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
1689 dax_lock_entry(&xas, entry);
1690 xas_unlock_irq(&xas);
1691 if (order == 0)
1692 ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1693 #ifdef CONFIG_FS_DAX_PMD
1694 else if (order == PMD_ORDER)
1695 ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
1696 #endif
1697 else
1698 ret = VM_FAULT_FALLBACK;
1699 dax_unlock_entry(&xas, entry);
1700 trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
1701 return ret;
1702 }
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714 vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
1715 enum page_entry_size pe_size, pfn_t pfn)
1716 {
1717 int err;
1718 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
1719 unsigned int order = pe_order(pe_size);
1720 size_t len = PAGE_SIZE << order;
1721
1722 err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
1723 if (err)
1724 return VM_FAULT_SIGBUS;
1725 return dax_insert_pfn_mkwrite(vmf, pfn, order);
1726 }
1727 EXPORT_SYMBOL_GPL(dax_finish_sync_fault);