This source file includes following definitions.
- page_cache_delete
- unaccount_page_cache_page
- __delete_from_page_cache
- page_cache_free_page
- delete_from_page_cache
- page_cache_delete_batch
- delete_from_page_cache_batch
- filemap_check_errors
- filemap_check_and_keep_errors
- __filemap_fdatawrite_range
- __filemap_fdatawrite
- filemap_fdatawrite
- filemap_fdatawrite_range
- filemap_flush
- filemap_range_has_page
- __filemap_fdatawait_range
- filemap_fdatawait_range
- filemap_fdatawait_range_keep_errors
- file_fdatawait_range
- filemap_fdatawait_keep_errors
- mapping_needs_writeback
- filemap_write_and_wait
- filemap_write_and_wait_range
- __filemap_set_wb_err
- file_check_and_advance_wb_err
- file_write_and_wait_range
- replace_page_cache_page
- __add_to_page_cache_locked
- add_to_page_cache_locked
- add_to_page_cache_lru
- __page_cache_alloc
- page_waitqueue
- pagecache_init
- wake_page_function
- wake_up_page_bit
- wake_up_page
- wait_on_page_bit_common
- wait_on_page_bit
- wait_on_page_bit_killable
- put_and_wait_on_page_locked
- add_page_wait_queue
- clear_bit_unlock_is_negative_byte
- unlock_page
- end_page_writeback
- page_endio
- __lock_page
- __lock_page_killable
- __lock_page_or_retry
- page_cache_next_miss
- page_cache_prev_miss
- find_get_entry
- find_lock_entry
- pagecache_get_page
- find_get_entries
- find_get_pages_range
- find_get_pages_contig
- find_get_pages_range_tag
- shrink_readahead_size_eio
- generic_file_buffered_read
- generic_file_read_iter
- lock_page_maybe_drop_mmap
- do_sync_mmap_readahead
- do_async_mmap_readahead
- filemap_fault
- filemap_map_pages
- filemap_page_mkwrite
- generic_file_mmap
- generic_file_readonly_mmap
- filemap_page_mkwrite
- generic_file_mmap
- generic_file_readonly_mmap
- wait_on_page_read
- do_read_cache_page
- read_cache_page
- read_cache_page_gfp
- generic_write_check_limits
- generic_write_checks
- generic_remap_checks
- generic_file_rw_checks
- generic_copy_file_checks
- pagecache_write_begin
- pagecache_write_end
- generic_file_direct_write
- grab_cache_page_write_begin
- generic_perform_write
- __generic_file_write_iter
- generic_file_write_iter
- try_to_release_page
1
2
3
4
5
6
7
8
9
10
11
12
13 #include <linux/export.h>
14 #include <linux/compiler.h>
15 #include <linux/dax.h>
16 #include <linux/fs.h>
17 #include <linux/sched/signal.h>
18 #include <linux/uaccess.h>
19 #include <linux/capability.h>
20 #include <linux/kernel_stat.h>
21 #include <linux/gfp.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/mman.h>
25 #include <linux/pagemap.h>
26 #include <linux/file.h>
27 #include <linux/uio.h>
28 #include <linux/error-injection.h>
29 #include <linux/hash.h>
30 #include <linux/writeback.h>
31 #include <linux/backing-dev.h>
32 #include <linux/pagevec.h>
33 #include <linux/blkdev.h>
34 #include <linux/security.h>
35 #include <linux/cpuset.h>
36 #include <linux/hugetlb.h>
37 #include <linux/memcontrol.h>
38 #include <linux/cleancache.h>
39 #include <linux/shmem_fs.h>
40 #include <linux/rmap.h>
41 #include <linux/delayacct.h>
42 #include <linux/psi.h>
43 #include <linux/ramfs.h>
44 #include "internal.h"
45
46 #define CREATE_TRACE_POINTS
47 #include <trace/events/filemap.h>
48
49
50
51
52 #include <linux/buffer_head.h>
53
54 #include <asm/mman.h>
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119 static void page_cache_delete(struct address_space *mapping,
120 struct page *page, void *shadow)
121 {
122 XA_STATE(xas, &mapping->i_pages, page->index);
123 unsigned int nr = 1;
124
125 mapping_set_update(&xas, mapping);
126
127
128 if (!PageHuge(page)) {
129 xas_set_order(&xas, page->index, compound_order(page));
130 nr = compound_nr(page);
131 }
132
133 VM_BUG_ON_PAGE(!PageLocked(page), page);
134 VM_BUG_ON_PAGE(PageTail(page), page);
135 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
136
137 xas_store(&xas, shadow);
138 xas_init_marks(&xas);
139
140 page->mapping = NULL;
141
142
143 if (shadow) {
144 mapping->nrexceptional += nr;
145
146
147
148
149
150
151 smp_wmb();
152 }
153 mapping->nrpages -= nr;
154 }
155
156 static void unaccount_page_cache_page(struct address_space *mapping,
157 struct page *page)
158 {
159 int nr;
160
161
162
163
164
165
166 if (PageUptodate(page) && PageMappedToDisk(page))
167 cleancache_put_page(page);
168 else
169 cleancache_invalidate_page(mapping, page);
170
171 VM_BUG_ON_PAGE(PageTail(page), page);
172 VM_BUG_ON_PAGE(page_mapped(page), page);
173 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
174 int mapcount;
175
176 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
177 current->comm, page_to_pfn(page));
178 dump_page(page, "still mapped when deleted");
179 dump_stack();
180 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
181
182 mapcount = page_mapcount(page);
183 if (mapping_exiting(mapping) &&
184 page_count(page) >= mapcount + 2) {
185
186
187
188
189
190
191 page_mapcount_reset(page);
192 page_ref_sub(page, mapcount);
193 }
194 }
195
196
197 if (PageHuge(page))
198 return;
199
200 nr = hpage_nr_pages(page);
201
202 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
203 if (PageSwapBacked(page)) {
204 __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
205 if (PageTransHuge(page))
206 __dec_node_page_state(page, NR_SHMEM_THPS);
207 } else if (PageTransHuge(page)) {
208 __dec_node_page_state(page, NR_FILE_THPS);
209 filemap_nr_thps_dec(mapping);
210 }
211
212
213
214
215
216
217
218
219
220
221
222 if (WARN_ON_ONCE(PageDirty(page)))
223 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
224 }
225
226
227
228
229
230
231 void __delete_from_page_cache(struct page *page, void *shadow)
232 {
233 struct address_space *mapping = page->mapping;
234
235 trace_mm_filemap_delete_from_page_cache(page);
236
237 unaccount_page_cache_page(mapping, page);
238 page_cache_delete(mapping, page, shadow);
239 }
240
241 static void page_cache_free_page(struct address_space *mapping,
242 struct page *page)
243 {
244 void (*freepage)(struct page *);
245
246 freepage = mapping->a_ops->freepage;
247 if (freepage)
248 freepage(page);
249
250 if (PageTransHuge(page) && !PageHuge(page)) {
251 page_ref_sub(page, HPAGE_PMD_NR);
252 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
253 } else {
254 put_page(page);
255 }
256 }
257
258
259
260
261
262
263
264
265
266 void delete_from_page_cache(struct page *page)
267 {
268 struct address_space *mapping = page_mapping(page);
269 unsigned long flags;
270
271 BUG_ON(!PageLocked(page));
272 xa_lock_irqsave(&mapping->i_pages, flags);
273 __delete_from_page_cache(page, NULL);
274 xa_unlock_irqrestore(&mapping->i_pages, flags);
275
276 page_cache_free_page(mapping, page);
277 }
278 EXPORT_SYMBOL(delete_from_page_cache);
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294 static void page_cache_delete_batch(struct address_space *mapping,
295 struct pagevec *pvec)
296 {
297 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
298 int total_pages = 0;
299 int i = 0;
300 struct page *page;
301
302 mapping_set_update(&xas, mapping);
303 xas_for_each(&xas, page, ULONG_MAX) {
304 if (i >= pagevec_count(pvec))
305 break;
306
307
308 if (xa_is_value(page))
309 continue;
310
311
312
313
314
315
316
317 if (page != pvec->pages[i]) {
318 VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
319 page);
320 continue;
321 }
322
323 WARN_ON_ONCE(!PageLocked(page));
324
325 if (page->index == xas.xa_index)
326 page->mapping = NULL;
327
328
329
330
331
332
333
334 if (page->index + compound_nr(page) - 1 == xas.xa_index)
335 i++;
336 xas_store(&xas, NULL);
337 total_pages++;
338 }
339 mapping->nrpages -= total_pages;
340 }
341
342 void delete_from_page_cache_batch(struct address_space *mapping,
343 struct pagevec *pvec)
344 {
345 int i;
346 unsigned long flags;
347
348 if (!pagevec_count(pvec))
349 return;
350
351 xa_lock_irqsave(&mapping->i_pages, flags);
352 for (i = 0; i < pagevec_count(pvec); i++) {
353 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
354
355 unaccount_page_cache_page(mapping, pvec->pages[i]);
356 }
357 page_cache_delete_batch(mapping, pvec);
358 xa_unlock_irqrestore(&mapping->i_pages, flags);
359
360 for (i = 0; i < pagevec_count(pvec); i++)
361 page_cache_free_page(mapping, pvec->pages[i]);
362 }
363
364 int filemap_check_errors(struct address_space *mapping)
365 {
366 int ret = 0;
367
368 if (test_bit(AS_ENOSPC, &mapping->flags) &&
369 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
370 ret = -ENOSPC;
371 if (test_bit(AS_EIO, &mapping->flags) &&
372 test_and_clear_bit(AS_EIO, &mapping->flags))
373 ret = -EIO;
374 return ret;
375 }
376 EXPORT_SYMBOL(filemap_check_errors);
377
378 static int filemap_check_and_keep_errors(struct address_space *mapping)
379 {
380
381 if (test_bit(AS_EIO, &mapping->flags))
382 return -EIO;
383 if (test_bit(AS_ENOSPC, &mapping->flags))
384 return -ENOSPC;
385 return 0;
386 }
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405 int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
406 loff_t end, int sync_mode)
407 {
408 int ret;
409 struct writeback_control wbc = {
410 .sync_mode = sync_mode,
411 .nr_to_write = LONG_MAX,
412 .range_start = start,
413 .range_end = end,
414 };
415
416 if (!mapping_cap_writeback_dirty(mapping) ||
417 !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
418 return 0;
419
420 wbc_attach_fdatawrite_inode(&wbc, mapping->host);
421 ret = do_writepages(mapping, &wbc);
422 wbc_detach_inode(&wbc);
423 return ret;
424 }
425
426 static inline int __filemap_fdatawrite(struct address_space *mapping,
427 int sync_mode)
428 {
429 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
430 }
431
432 int filemap_fdatawrite(struct address_space *mapping)
433 {
434 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
435 }
436 EXPORT_SYMBOL(filemap_fdatawrite);
437
438 int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
439 loff_t end)
440 {
441 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
442 }
443 EXPORT_SYMBOL(filemap_fdatawrite_range);
444
445
446
447
448
449
450
451
452
453
454 int filemap_flush(struct address_space *mapping)
455 {
456 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
457 }
458 EXPORT_SYMBOL(filemap_flush);
459
460
461
462
463
464
465
466
467
468
469
470
471
472 bool filemap_range_has_page(struct address_space *mapping,
473 loff_t start_byte, loff_t end_byte)
474 {
475 struct page *page;
476 XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
477 pgoff_t max = end_byte >> PAGE_SHIFT;
478
479 if (end_byte < start_byte)
480 return false;
481
482 rcu_read_lock();
483 for (;;) {
484 page = xas_find(&xas, max);
485 if (xas_retry(&xas, page))
486 continue;
487
488 if (xa_is_value(page))
489 continue;
490
491
492
493
494
495 break;
496 }
497 rcu_read_unlock();
498
499 return page != NULL;
500 }
501 EXPORT_SYMBOL(filemap_range_has_page);
502
503 static void __filemap_fdatawait_range(struct address_space *mapping,
504 loff_t start_byte, loff_t end_byte)
505 {
506 pgoff_t index = start_byte >> PAGE_SHIFT;
507 pgoff_t end = end_byte >> PAGE_SHIFT;
508 struct pagevec pvec;
509 int nr_pages;
510
511 if (end_byte < start_byte)
512 return;
513
514 pagevec_init(&pvec);
515 while (index <= end) {
516 unsigned i;
517
518 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
519 end, PAGECACHE_TAG_WRITEBACK);
520 if (!nr_pages)
521 break;
522
523 for (i = 0; i < nr_pages; i++) {
524 struct page *page = pvec.pages[i];
525
526 wait_on_page_writeback(page);
527 ClearPageError(page);
528 }
529 pagevec_release(&pvec);
530 cond_resched();
531 }
532 }
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550 int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
551 loff_t end_byte)
552 {
553 __filemap_fdatawait_range(mapping, start_byte, end_byte);
554 return filemap_check_errors(mapping);
555 }
556 EXPORT_SYMBOL(filemap_fdatawait_range);
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572 int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
573 loff_t start_byte, loff_t end_byte)
574 {
575 __filemap_fdatawait_range(mapping, start_byte, end_byte);
576 return filemap_check_and_keep_errors(mapping);
577 }
578 EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596 int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
597 {
598 struct address_space *mapping = file->f_mapping;
599
600 __filemap_fdatawait_range(mapping, start_byte, end_byte);
601 return file_check_and_advance_wb_err(file);
602 }
603 EXPORT_SYMBOL(file_fdatawait_range);
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619 int filemap_fdatawait_keep_errors(struct address_space *mapping)
620 {
621 __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
622 return filemap_check_and_keep_errors(mapping);
623 }
624 EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
625
626
627 static bool mapping_needs_writeback(struct address_space *mapping)
628 {
629 if (dax_mapping(mapping))
630 return mapping->nrexceptional;
631
632 return mapping->nrpages;
633 }
634
635 int filemap_write_and_wait(struct address_space *mapping)
636 {
637 int err = 0;
638
639 if (mapping_needs_writeback(mapping)) {
640 err = filemap_fdatawrite(mapping);
641
642
643
644
645
646
647 if (err != -EIO) {
648 int err2 = filemap_fdatawait(mapping);
649 if (!err)
650 err = err2;
651 } else {
652
653 filemap_check_errors(mapping);
654 }
655 } else {
656 err = filemap_check_errors(mapping);
657 }
658 return err;
659 }
660 EXPORT_SYMBOL(filemap_write_and_wait);
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675 int filemap_write_and_wait_range(struct address_space *mapping,
676 loff_t lstart, loff_t lend)
677 {
678 int err = 0;
679
680 if (mapping_needs_writeback(mapping)) {
681 err = __filemap_fdatawrite_range(mapping, lstart, lend,
682 WB_SYNC_ALL);
683
684 if (err != -EIO) {
685 int err2 = filemap_fdatawait_range(mapping,
686 lstart, lend);
687 if (!err)
688 err = err2;
689 } else {
690
691 filemap_check_errors(mapping);
692 }
693 } else {
694 err = filemap_check_errors(mapping);
695 }
696 return err;
697 }
698 EXPORT_SYMBOL(filemap_write_and_wait_range);
699
700 void __filemap_set_wb_err(struct address_space *mapping, int err)
701 {
702 errseq_t eseq = errseq_set(&mapping->wb_err, err);
703
704 trace_filemap_set_wb_err(mapping, eseq);
705 }
706 EXPORT_SYMBOL(__filemap_set_wb_err);
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732 int file_check_and_advance_wb_err(struct file *file)
733 {
734 int err = 0;
735 errseq_t old = READ_ONCE(file->f_wb_err);
736 struct address_space *mapping = file->f_mapping;
737
738
739 if (errseq_check(&mapping->wb_err, old)) {
740
741 spin_lock(&file->f_lock);
742 old = file->f_wb_err;
743 err = errseq_check_and_advance(&mapping->wb_err,
744 &file->f_wb_err);
745 trace_file_check_and_advance_wb_err(file, old);
746 spin_unlock(&file->f_lock);
747 }
748
749
750
751
752
753
754 clear_bit(AS_EIO, &mapping->flags);
755 clear_bit(AS_ENOSPC, &mapping->flags);
756 return err;
757 }
758 EXPORT_SYMBOL(file_check_and_advance_wb_err);
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776 int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
777 {
778 int err = 0, err2;
779 struct address_space *mapping = file->f_mapping;
780
781 if (mapping_needs_writeback(mapping)) {
782 err = __filemap_fdatawrite_range(mapping, lstart, lend,
783 WB_SYNC_ALL);
784
785 if (err != -EIO)
786 __filemap_fdatawait_range(mapping, lstart, lend);
787 }
788 err2 = file_check_and_advance_wb_err(file);
789 if (!err)
790 err = err2;
791 return err;
792 }
793 EXPORT_SYMBOL(file_write_and_wait_range);
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
812 {
813 struct address_space *mapping = old->mapping;
814 void (*freepage)(struct page *) = mapping->a_ops->freepage;
815 pgoff_t offset = old->index;
816 XA_STATE(xas, &mapping->i_pages, offset);
817 unsigned long flags;
818
819 VM_BUG_ON_PAGE(!PageLocked(old), old);
820 VM_BUG_ON_PAGE(!PageLocked(new), new);
821 VM_BUG_ON_PAGE(new->mapping, new);
822
823 get_page(new);
824 new->mapping = mapping;
825 new->index = offset;
826
827 xas_lock_irqsave(&xas, flags);
828 xas_store(&xas, new);
829
830 old->mapping = NULL;
831
832 if (!PageHuge(old))
833 __dec_node_page_state(new, NR_FILE_PAGES);
834 if (!PageHuge(new))
835 __inc_node_page_state(new, NR_FILE_PAGES);
836 if (PageSwapBacked(old))
837 __dec_node_page_state(new, NR_SHMEM);
838 if (PageSwapBacked(new))
839 __inc_node_page_state(new, NR_SHMEM);
840 xas_unlock_irqrestore(&xas, flags);
841 mem_cgroup_migrate(old, new);
842 if (freepage)
843 freepage(old);
844 put_page(old);
845
846 return 0;
847 }
848 EXPORT_SYMBOL_GPL(replace_page_cache_page);
849
850 static int __add_to_page_cache_locked(struct page *page,
851 struct address_space *mapping,
852 pgoff_t offset, gfp_t gfp_mask,
853 void **shadowp)
854 {
855 XA_STATE(xas, &mapping->i_pages, offset);
856 int huge = PageHuge(page);
857 struct mem_cgroup *memcg;
858 int error;
859 void *old;
860
861 VM_BUG_ON_PAGE(!PageLocked(page), page);
862 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
863 mapping_set_update(&xas, mapping);
864
865 if (!huge) {
866 error = mem_cgroup_try_charge(page, current->mm,
867 gfp_mask, &memcg, false);
868 if (error)
869 return error;
870 }
871
872 get_page(page);
873 page->mapping = mapping;
874 page->index = offset;
875
876 do {
877 xas_lock_irq(&xas);
878 old = xas_load(&xas);
879 if (old && !xa_is_value(old))
880 xas_set_err(&xas, -EEXIST);
881 xas_store(&xas, page);
882 if (xas_error(&xas))
883 goto unlock;
884
885 if (xa_is_value(old)) {
886 mapping->nrexceptional--;
887 if (shadowp)
888 *shadowp = old;
889 }
890 mapping->nrpages++;
891
892
893 if (!huge)
894 __inc_node_page_state(page, NR_FILE_PAGES);
895 unlock:
896 xas_unlock_irq(&xas);
897 } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
898
899 if (xas_error(&xas))
900 goto error;
901
902 if (!huge)
903 mem_cgroup_commit_charge(page, memcg, false, false);
904 trace_mm_filemap_add_to_page_cache(page);
905 return 0;
906 error:
907 page->mapping = NULL;
908
909 if (!huge)
910 mem_cgroup_cancel_charge(page, memcg, false);
911 put_page(page);
912 return xas_error(&xas);
913 }
914 ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
915
916
917
918
919
920
921
922
923
924
925
926
927
928 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
929 pgoff_t offset, gfp_t gfp_mask)
930 {
931 return __add_to_page_cache_locked(page, mapping, offset,
932 gfp_mask, NULL);
933 }
934 EXPORT_SYMBOL(add_to_page_cache_locked);
935
936 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
937 pgoff_t offset, gfp_t gfp_mask)
938 {
939 void *shadow = NULL;
940 int ret;
941
942 __SetPageLocked(page);
943 ret = __add_to_page_cache_locked(page, mapping, offset,
944 gfp_mask, &shadow);
945 if (unlikely(ret))
946 __ClearPageLocked(page);
947 else {
948
949
950
951
952
953
954
955
956 WARN_ON_ONCE(PageActive(page));
957 if (!(gfp_mask & __GFP_WRITE) && shadow)
958 workingset_refault(page, shadow);
959 lru_cache_add(page);
960 }
961 return ret;
962 }
963 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
964
965 #ifdef CONFIG_NUMA
966 struct page *__page_cache_alloc(gfp_t gfp)
967 {
968 int n;
969 struct page *page;
970
971 if (cpuset_do_page_mem_spread()) {
972 unsigned int cpuset_mems_cookie;
973 do {
974 cpuset_mems_cookie = read_mems_allowed_begin();
975 n = cpuset_mem_spread_node();
976 page = __alloc_pages_node(n, gfp, 0);
977 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
978
979 return page;
980 }
981 return alloc_pages(gfp, 0);
982 }
983 EXPORT_SYMBOL(__page_cache_alloc);
984 #endif
985
986
987
988
989
990
991
992
993
994
995
996 #define PAGE_WAIT_TABLE_BITS 8
997 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
998 static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
999
1000 static wait_queue_head_t *page_waitqueue(struct page *page)
1001 {
1002 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
1003 }
1004
1005 void __init pagecache_init(void)
1006 {
1007 int i;
1008
1009 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
1010 init_waitqueue_head(&page_wait_table[i]);
1011
1012 page_writeback_init();
1013 }
1014
1015
1016 struct wait_page_key {
1017 struct page *page;
1018 int bit_nr;
1019 int page_match;
1020 };
1021
1022 struct wait_page_queue {
1023 struct page *page;
1024 int bit_nr;
1025 wait_queue_entry_t wait;
1026 };
1027
1028 static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
1029 {
1030 struct wait_page_key *key = arg;
1031 struct wait_page_queue *wait_page
1032 = container_of(wait, struct wait_page_queue, wait);
1033
1034 if (wait_page->page != key->page)
1035 return 0;
1036 key->page_match = 1;
1037
1038 if (wait_page->bit_nr != key->bit_nr)
1039 return 0;
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049 if (test_bit(key->bit_nr, &key->page->flags))
1050 return -1;
1051
1052 return autoremove_wake_function(wait, mode, sync, key);
1053 }
1054
1055 static void wake_up_page_bit(struct page *page, int bit_nr)
1056 {
1057 wait_queue_head_t *q = page_waitqueue(page);
1058 struct wait_page_key key;
1059 unsigned long flags;
1060 wait_queue_entry_t bookmark;
1061
1062 key.page = page;
1063 key.bit_nr = bit_nr;
1064 key.page_match = 0;
1065
1066 bookmark.flags = 0;
1067 bookmark.private = NULL;
1068 bookmark.func = NULL;
1069 INIT_LIST_HEAD(&bookmark.entry);
1070
1071 spin_lock_irqsave(&q->lock, flags);
1072 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1073
1074 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
1075
1076
1077
1078
1079
1080
1081 spin_unlock_irqrestore(&q->lock, flags);
1082 cpu_relax();
1083 spin_lock_irqsave(&q->lock, flags);
1084 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1085 }
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096 if (!waitqueue_active(q) || !key.page_match) {
1097 ClearPageWaiters(page);
1098
1099
1100
1101
1102
1103
1104
1105 }
1106 spin_unlock_irqrestore(&q->lock, flags);
1107 }
1108
1109 static void wake_up_page(struct page *page, int bit)
1110 {
1111 if (!PageWaiters(page))
1112 return;
1113 wake_up_page_bit(page, bit);
1114 }
1115
1116
1117
1118
1119 enum behavior {
1120 EXCLUSIVE,
1121
1122
1123 SHARED,
1124
1125
1126 DROP,
1127
1128
1129 };
1130
1131 static inline int wait_on_page_bit_common(wait_queue_head_t *q,
1132 struct page *page, int bit_nr, int state, enum behavior behavior)
1133 {
1134 struct wait_page_queue wait_page;
1135 wait_queue_entry_t *wait = &wait_page.wait;
1136 bool bit_is_set;
1137 bool thrashing = false;
1138 bool delayacct = false;
1139 unsigned long pflags;
1140 int ret = 0;
1141
1142 if (bit_nr == PG_locked &&
1143 !PageUptodate(page) && PageWorkingset(page)) {
1144 if (!PageSwapBacked(page)) {
1145 delayacct_thrashing_start();
1146 delayacct = true;
1147 }
1148 psi_memstall_enter(&pflags);
1149 thrashing = true;
1150 }
1151
1152 init_wait(wait);
1153 wait->flags = behavior == EXCLUSIVE ? WQ_FLAG_EXCLUSIVE : 0;
1154 wait->func = wake_page_function;
1155 wait_page.page = page;
1156 wait_page.bit_nr = bit_nr;
1157
1158 for (;;) {
1159 spin_lock_irq(&q->lock);
1160
1161 if (likely(list_empty(&wait->entry))) {
1162 __add_wait_queue_entry_tail(q, wait);
1163 SetPageWaiters(page);
1164 }
1165
1166 set_current_state(state);
1167
1168 spin_unlock_irq(&q->lock);
1169
1170 bit_is_set = test_bit(bit_nr, &page->flags);
1171 if (behavior == DROP)
1172 put_page(page);
1173
1174 if (likely(bit_is_set))
1175 io_schedule();
1176
1177 if (behavior == EXCLUSIVE) {
1178 if (!test_and_set_bit_lock(bit_nr, &page->flags))
1179 break;
1180 } else if (behavior == SHARED) {
1181 if (!test_bit(bit_nr, &page->flags))
1182 break;
1183 }
1184
1185 if (signal_pending_state(state, current)) {
1186 ret = -EINTR;
1187 break;
1188 }
1189
1190 if (behavior == DROP) {
1191
1192
1193
1194
1195
1196
1197
1198 break;
1199 }
1200 }
1201
1202 finish_wait(q, wait);
1203
1204 if (thrashing) {
1205 if (delayacct)
1206 delayacct_thrashing_end();
1207 psi_memstall_leave(&pflags);
1208 }
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218 return ret;
1219 }
1220
1221 void wait_on_page_bit(struct page *page, int bit_nr)
1222 {
1223 wait_queue_head_t *q = page_waitqueue(page);
1224 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
1225 }
1226 EXPORT_SYMBOL(wait_on_page_bit);
1227
1228 int wait_on_page_bit_killable(struct page *page, int bit_nr)
1229 {
1230 wait_queue_head_t *q = page_waitqueue(page);
1231 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
1232 }
1233 EXPORT_SYMBOL(wait_on_page_bit_killable);
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245 void put_and_wait_on_page_locked(struct page *page)
1246 {
1247 wait_queue_head_t *q;
1248
1249 page = compound_head(page);
1250 q = page_waitqueue(page);
1251 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
1252 }
1253
1254
1255
1256
1257
1258
1259
1260
1261 void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
1262 {
1263 wait_queue_head_t *q = page_waitqueue(page);
1264 unsigned long flags;
1265
1266 spin_lock_irqsave(&q->lock, flags);
1267 __add_wait_queue_entry_tail(q, waiter);
1268 SetPageWaiters(page);
1269 spin_unlock_irqrestore(&q->lock, flags);
1270 }
1271 EXPORT_SYMBOL_GPL(add_page_wait_queue);
1272
1273 #ifndef clear_bit_unlock_is_negative_byte
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287 static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
1288 {
1289 clear_bit_unlock(nr, mem);
1290
1291 return test_bit(PG_waiters, mem);
1292 }
1293
1294 #endif
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311 void unlock_page(struct page *page)
1312 {
1313 BUILD_BUG_ON(PG_waiters != 7);
1314 page = compound_head(page);
1315 VM_BUG_ON_PAGE(!PageLocked(page), page);
1316 if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
1317 wake_up_page_bit(page, PG_locked);
1318 }
1319 EXPORT_SYMBOL(unlock_page);
1320
1321
1322
1323
1324
1325 void end_page_writeback(struct page *page)
1326 {
1327
1328
1329
1330
1331
1332
1333
1334 if (PageReclaim(page)) {
1335 ClearPageReclaim(page);
1336 rotate_reclaimable_page(page);
1337 }
1338
1339 if (!test_clear_page_writeback(page))
1340 BUG();
1341
1342 smp_mb__after_atomic();
1343 wake_up_page(page, PG_writeback);
1344 }
1345 EXPORT_SYMBOL(end_page_writeback);
1346
1347
1348
1349
1350
1351 void page_endio(struct page *page, bool is_write, int err)
1352 {
1353 if (!is_write) {
1354 if (!err) {
1355 SetPageUptodate(page);
1356 } else {
1357 ClearPageUptodate(page);
1358 SetPageError(page);
1359 }
1360 unlock_page(page);
1361 } else {
1362 if (err) {
1363 struct address_space *mapping;
1364
1365 SetPageError(page);
1366 mapping = page_mapping(page);
1367 if (mapping)
1368 mapping_set_error(mapping, err);
1369 }
1370 end_page_writeback(page);
1371 }
1372 }
1373 EXPORT_SYMBOL_GPL(page_endio);
1374
1375
1376
1377
1378
1379 void __lock_page(struct page *__page)
1380 {
1381 struct page *page = compound_head(__page);
1382 wait_queue_head_t *q = page_waitqueue(page);
1383 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
1384 EXCLUSIVE);
1385 }
1386 EXPORT_SYMBOL(__lock_page);
1387
1388 int __lock_page_killable(struct page *__page)
1389 {
1390 struct page *page = compound_head(__page);
1391 wait_queue_head_t *q = page_waitqueue(page);
1392 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
1393 EXCLUSIVE);
1394 }
1395 EXPORT_SYMBOL_GPL(__lock_page_killable);
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
1409 unsigned int flags)
1410 {
1411 if (flags & FAULT_FLAG_ALLOW_RETRY) {
1412
1413
1414
1415
1416 if (flags & FAULT_FLAG_RETRY_NOWAIT)
1417 return 0;
1418
1419 up_read(&mm->mmap_sem);
1420 if (flags & FAULT_FLAG_KILLABLE)
1421 wait_on_page_locked_killable(page);
1422 else
1423 wait_on_page_locked(page);
1424 return 0;
1425 } else {
1426 if (flags & FAULT_FLAG_KILLABLE) {
1427 int ret;
1428
1429 ret = __lock_page_killable(page);
1430 if (ret) {
1431 up_read(&mm->mmap_sem);
1432 return 0;
1433 }
1434 } else
1435 __lock_page(page);
1436 return 1;
1437 }
1438 }
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459 pgoff_t page_cache_next_miss(struct address_space *mapping,
1460 pgoff_t index, unsigned long max_scan)
1461 {
1462 XA_STATE(xas, &mapping->i_pages, index);
1463
1464 while (max_scan--) {
1465 void *entry = xas_next(&xas);
1466 if (!entry || xa_is_value(entry))
1467 break;
1468 if (xas.xa_index == 0)
1469 break;
1470 }
1471
1472 return xas.xa_index;
1473 }
1474 EXPORT_SYMBOL(page_cache_next_miss);
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495 pgoff_t page_cache_prev_miss(struct address_space *mapping,
1496 pgoff_t index, unsigned long max_scan)
1497 {
1498 XA_STATE(xas, &mapping->i_pages, index);
1499
1500 while (max_scan--) {
1501 void *entry = xas_prev(&xas);
1502 if (!entry || xa_is_value(entry))
1503 break;
1504 if (xas.xa_index == ULONG_MAX)
1505 break;
1506 }
1507
1508 return xas.xa_index;
1509 }
1510 EXPORT_SYMBOL(page_cache_prev_miss);
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
1526 {
1527 XA_STATE(xas, &mapping->i_pages, offset);
1528 struct page *page;
1529
1530 rcu_read_lock();
1531 repeat:
1532 xas_reset(&xas);
1533 page = xas_load(&xas);
1534 if (xas_retry(&xas, page))
1535 goto repeat;
1536
1537
1538
1539
1540 if (!page || xa_is_value(page))
1541 goto out;
1542
1543 if (!page_cache_get_speculative(page))
1544 goto repeat;
1545
1546
1547
1548
1549
1550
1551 if (unlikely(page != xas_reload(&xas))) {
1552 put_page(page);
1553 goto repeat;
1554 }
1555 page = find_subpage(page, offset);
1556 out:
1557 rcu_read_unlock();
1558
1559 return page;
1560 }
1561 EXPORT_SYMBOL(find_get_entry);
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
1580 {
1581 struct page *page;
1582
1583 repeat:
1584 page = find_get_entry(mapping, offset);
1585 if (page && !xa_is_value(page)) {
1586 lock_page(page);
1587
1588 if (unlikely(page_mapping(page) != mapping)) {
1589 unlock_page(page);
1590 put_page(page);
1591 goto repeat;
1592 }
1593 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
1594 }
1595 return page;
1596 }
1597 EXPORT_SYMBOL(find_lock_entry);
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
1630 int fgp_flags, gfp_t gfp_mask)
1631 {
1632 struct page *page;
1633
1634 repeat:
1635 page = find_get_entry(mapping, offset);
1636 if (xa_is_value(page))
1637 page = NULL;
1638 if (!page)
1639 goto no_page;
1640
1641 if (fgp_flags & FGP_LOCK) {
1642 if (fgp_flags & FGP_NOWAIT) {
1643 if (!trylock_page(page)) {
1644 put_page(page);
1645 return NULL;
1646 }
1647 } else {
1648 lock_page(page);
1649 }
1650
1651
1652 if (unlikely(compound_head(page)->mapping != mapping)) {
1653 unlock_page(page);
1654 put_page(page);
1655 goto repeat;
1656 }
1657 VM_BUG_ON_PAGE(page->index != offset, page);
1658 }
1659
1660 if (fgp_flags & FGP_ACCESSED)
1661 mark_page_accessed(page);
1662
1663 no_page:
1664 if (!page && (fgp_flags & FGP_CREAT)) {
1665 int err;
1666 if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
1667 gfp_mask |= __GFP_WRITE;
1668 if (fgp_flags & FGP_NOFS)
1669 gfp_mask &= ~__GFP_FS;
1670
1671 page = __page_cache_alloc(gfp_mask);
1672 if (!page)
1673 return NULL;
1674
1675 if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
1676 fgp_flags |= FGP_LOCK;
1677
1678
1679 if (fgp_flags & FGP_ACCESSED)
1680 __SetPageReferenced(page);
1681
1682 err = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
1683 if (unlikely(err)) {
1684 put_page(page);
1685 page = NULL;
1686 if (err == -EEXIST)
1687 goto repeat;
1688 }
1689
1690
1691
1692
1693
1694 if (page && (fgp_flags & FGP_FOR_MMAP))
1695 unlock_page(page);
1696 }
1697
1698 return page;
1699 }
1700 EXPORT_SYMBOL(pagecache_get_page);
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724 unsigned find_get_entries(struct address_space *mapping,
1725 pgoff_t start, unsigned int nr_entries,
1726 struct page **entries, pgoff_t *indices)
1727 {
1728 XA_STATE(xas, &mapping->i_pages, start);
1729 struct page *page;
1730 unsigned int ret = 0;
1731
1732 if (!nr_entries)
1733 return 0;
1734
1735 rcu_read_lock();
1736 xas_for_each(&xas, page, ULONG_MAX) {
1737 if (xas_retry(&xas, page))
1738 continue;
1739
1740
1741
1742
1743
1744 if (xa_is_value(page))
1745 goto export;
1746
1747 if (!page_cache_get_speculative(page))
1748 goto retry;
1749
1750
1751 if (unlikely(page != xas_reload(&xas)))
1752 goto put_page;
1753 page = find_subpage(page, xas.xa_index);
1754
1755 export:
1756 indices[ret] = xas.xa_index;
1757 entries[ret] = page;
1758 if (++ret == nr_entries)
1759 break;
1760 continue;
1761 put_page:
1762 put_page(page);
1763 retry:
1764 xas_reset(&xas);
1765 }
1766 rcu_read_unlock();
1767 return ret;
1768 }
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791 unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
1792 pgoff_t end, unsigned int nr_pages,
1793 struct page **pages)
1794 {
1795 XA_STATE(xas, &mapping->i_pages, *start);
1796 struct page *page;
1797 unsigned ret = 0;
1798
1799 if (unlikely(!nr_pages))
1800 return 0;
1801
1802 rcu_read_lock();
1803 xas_for_each(&xas, page, end) {
1804 if (xas_retry(&xas, page))
1805 continue;
1806
1807 if (xa_is_value(page))
1808 continue;
1809
1810 if (!page_cache_get_speculative(page))
1811 goto retry;
1812
1813
1814 if (unlikely(page != xas_reload(&xas)))
1815 goto put_page;
1816
1817 pages[ret] = find_subpage(page, xas.xa_index);
1818 if (++ret == nr_pages) {
1819 *start = xas.xa_index + 1;
1820 goto out;
1821 }
1822 continue;
1823 put_page:
1824 put_page(page);
1825 retry:
1826 xas_reset(&xas);
1827 }
1828
1829
1830
1831
1832
1833
1834
1835 if (end == (pgoff_t)-1)
1836 *start = (pgoff_t)-1;
1837 else
1838 *start = end + 1;
1839 out:
1840 rcu_read_unlock();
1841
1842 return ret;
1843 }
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1858 unsigned int nr_pages, struct page **pages)
1859 {
1860 XA_STATE(xas, &mapping->i_pages, index);
1861 struct page *page;
1862 unsigned int ret = 0;
1863
1864 if (unlikely(!nr_pages))
1865 return 0;
1866
1867 rcu_read_lock();
1868 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1869 if (xas_retry(&xas, page))
1870 continue;
1871
1872
1873
1874
1875 if (xa_is_value(page))
1876 break;
1877
1878 if (!page_cache_get_speculative(page))
1879 goto retry;
1880
1881
1882 if (unlikely(page != xas_reload(&xas)))
1883 goto put_page;
1884
1885 pages[ret] = find_subpage(page, xas.xa_index);
1886 if (++ret == nr_pages)
1887 break;
1888 continue;
1889 put_page:
1890 put_page(page);
1891 retry:
1892 xas_reset(&xas);
1893 }
1894 rcu_read_unlock();
1895 return ret;
1896 }
1897 EXPORT_SYMBOL(find_get_pages_contig);
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
1914 pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
1915 struct page **pages)
1916 {
1917 XA_STATE(xas, &mapping->i_pages, *index);
1918 struct page *page;
1919 unsigned ret = 0;
1920
1921 if (unlikely(!nr_pages))
1922 return 0;
1923
1924 rcu_read_lock();
1925 xas_for_each_marked(&xas, page, end, tag) {
1926 if (xas_retry(&xas, page))
1927 continue;
1928
1929
1930
1931
1932
1933 if (xa_is_value(page))
1934 continue;
1935
1936 if (!page_cache_get_speculative(page))
1937 goto retry;
1938
1939
1940 if (unlikely(page != xas_reload(&xas)))
1941 goto put_page;
1942
1943 pages[ret] = find_subpage(page, xas.xa_index);
1944 if (++ret == nr_pages) {
1945 *index = xas.xa_index + 1;
1946 goto out;
1947 }
1948 continue;
1949 put_page:
1950 put_page(page);
1951 retry:
1952 xas_reset(&xas);
1953 }
1954
1955
1956
1957
1958
1959
1960
1961 if (end == (pgoff_t)-1)
1962 *index = (pgoff_t)-1;
1963 else
1964 *index = end + 1;
1965 out:
1966 rcu_read_unlock();
1967
1968 return ret;
1969 }
1970 EXPORT_SYMBOL(find_get_pages_range_tag);
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987 static void shrink_readahead_size_eio(struct file *filp,
1988 struct file_ra_state *ra)
1989 {
1990 ra->ra_pages /= 4;
1991 }
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009 static ssize_t generic_file_buffered_read(struct kiocb *iocb,
2010 struct iov_iter *iter, ssize_t written)
2011 {
2012 struct file *filp = iocb->ki_filp;
2013 struct address_space *mapping = filp->f_mapping;
2014 struct inode *inode = mapping->host;
2015 struct file_ra_state *ra = &filp->f_ra;
2016 loff_t *ppos = &iocb->ki_pos;
2017 pgoff_t index;
2018 pgoff_t last_index;
2019 pgoff_t prev_index;
2020 unsigned long offset;
2021 unsigned int prev_offset;
2022 int error = 0;
2023
2024 if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
2025 return 0;
2026 iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
2027
2028 index = *ppos >> PAGE_SHIFT;
2029 prev_index = ra->prev_pos >> PAGE_SHIFT;
2030 prev_offset = ra->prev_pos & (PAGE_SIZE-1);
2031 last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
2032 offset = *ppos & ~PAGE_MASK;
2033
2034 for (;;) {
2035 struct page *page;
2036 pgoff_t end_index;
2037 loff_t isize;
2038 unsigned long nr, ret;
2039
2040 cond_resched();
2041 find_page:
2042 if (fatal_signal_pending(current)) {
2043 error = -EINTR;
2044 goto out;
2045 }
2046
2047 page = find_get_page(mapping, index);
2048 if (!page) {
2049 if (iocb->ki_flags & IOCB_NOWAIT)
2050 goto would_block;
2051 page_cache_sync_readahead(mapping,
2052 ra, filp,
2053 index, last_index - index);
2054 page = find_get_page(mapping, index);
2055 if (unlikely(page == NULL))
2056 goto no_cached_page;
2057 }
2058 if (PageReadahead(page)) {
2059 page_cache_async_readahead(mapping,
2060 ra, filp, page,
2061 index, last_index - index);
2062 }
2063 if (!PageUptodate(page)) {
2064 if (iocb->ki_flags & IOCB_NOWAIT) {
2065 put_page(page);
2066 goto would_block;
2067 }
2068
2069
2070
2071
2072
2073
2074 error = wait_on_page_locked_killable(page);
2075 if (unlikely(error))
2076 goto readpage_error;
2077 if (PageUptodate(page))
2078 goto page_ok;
2079
2080 if (inode->i_blkbits == PAGE_SHIFT ||
2081 !mapping->a_ops->is_partially_uptodate)
2082 goto page_not_up_to_date;
2083
2084 if (unlikely(iov_iter_is_pipe(iter)))
2085 goto page_not_up_to_date;
2086 if (!trylock_page(page))
2087 goto page_not_up_to_date;
2088
2089 if (!page->mapping)
2090 goto page_not_up_to_date_locked;
2091 if (!mapping->a_ops->is_partially_uptodate(page,
2092 offset, iter->count))
2093 goto page_not_up_to_date_locked;
2094 unlock_page(page);
2095 }
2096 page_ok:
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106 isize = i_size_read(inode);
2107 end_index = (isize - 1) >> PAGE_SHIFT;
2108 if (unlikely(!isize || index > end_index)) {
2109 put_page(page);
2110 goto out;
2111 }
2112
2113
2114 nr = PAGE_SIZE;
2115 if (index == end_index) {
2116 nr = ((isize - 1) & ~PAGE_MASK) + 1;
2117 if (nr <= offset) {
2118 put_page(page);
2119 goto out;
2120 }
2121 }
2122 nr = nr - offset;
2123
2124
2125
2126
2127
2128 if (mapping_writably_mapped(mapping))
2129 flush_dcache_page(page);
2130
2131
2132
2133
2134
2135 if (prev_index != index || offset != prev_offset)
2136 mark_page_accessed(page);
2137 prev_index = index;
2138
2139
2140
2141
2142
2143
2144 ret = copy_page_to_iter(page, offset, nr, iter);
2145 offset += ret;
2146 index += offset >> PAGE_SHIFT;
2147 offset &= ~PAGE_MASK;
2148 prev_offset = offset;
2149
2150 put_page(page);
2151 written += ret;
2152 if (!iov_iter_count(iter))
2153 goto out;
2154 if (ret < nr) {
2155 error = -EFAULT;
2156 goto out;
2157 }
2158 continue;
2159
2160 page_not_up_to_date:
2161
2162 error = lock_page_killable(page);
2163 if (unlikely(error))
2164 goto readpage_error;
2165
2166 page_not_up_to_date_locked:
2167
2168 if (!page->mapping) {
2169 unlock_page(page);
2170 put_page(page);
2171 continue;
2172 }
2173
2174
2175 if (PageUptodate(page)) {
2176 unlock_page(page);
2177 goto page_ok;
2178 }
2179
2180 readpage:
2181
2182
2183
2184
2185
2186 ClearPageError(page);
2187
2188 error = mapping->a_ops->readpage(filp, page);
2189
2190 if (unlikely(error)) {
2191 if (error == AOP_TRUNCATED_PAGE) {
2192 put_page(page);
2193 error = 0;
2194 goto find_page;
2195 }
2196 goto readpage_error;
2197 }
2198
2199 if (!PageUptodate(page)) {
2200 error = lock_page_killable(page);
2201 if (unlikely(error))
2202 goto readpage_error;
2203 if (!PageUptodate(page)) {
2204 if (page->mapping == NULL) {
2205
2206
2207
2208 unlock_page(page);
2209 put_page(page);
2210 goto find_page;
2211 }
2212 unlock_page(page);
2213 shrink_readahead_size_eio(filp, ra);
2214 error = -EIO;
2215 goto readpage_error;
2216 }
2217 unlock_page(page);
2218 }
2219
2220 goto page_ok;
2221
2222 readpage_error:
2223
2224 put_page(page);
2225 goto out;
2226
2227 no_cached_page:
2228
2229
2230
2231
2232 page = page_cache_alloc(mapping);
2233 if (!page) {
2234 error = -ENOMEM;
2235 goto out;
2236 }
2237 error = add_to_page_cache_lru(page, mapping, index,
2238 mapping_gfp_constraint(mapping, GFP_KERNEL));
2239 if (error) {
2240 put_page(page);
2241 if (error == -EEXIST) {
2242 error = 0;
2243 goto find_page;
2244 }
2245 goto out;
2246 }
2247 goto readpage;
2248 }
2249
2250 would_block:
2251 error = -EAGAIN;
2252 out:
2253 ra->prev_pos = prev_index;
2254 ra->prev_pos <<= PAGE_SHIFT;
2255 ra->prev_pos |= prev_offset;
2256
2257 *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
2258 file_accessed(filp);
2259 return written ? written : error;
2260 }
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273 ssize_t
2274 generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2275 {
2276 size_t count = iov_iter_count(iter);
2277 ssize_t retval = 0;
2278
2279 if (!count)
2280 goto out;
2281
2282 if (iocb->ki_flags & IOCB_DIRECT) {
2283 struct file *file = iocb->ki_filp;
2284 struct address_space *mapping = file->f_mapping;
2285 struct inode *inode = mapping->host;
2286 loff_t size;
2287
2288 size = i_size_read(inode);
2289 if (iocb->ki_flags & IOCB_NOWAIT) {
2290 if (filemap_range_has_page(mapping, iocb->ki_pos,
2291 iocb->ki_pos + count - 1))
2292 return -EAGAIN;
2293 } else {
2294 retval = filemap_write_and_wait_range(mapping,
2295 iocb->ki_pos,
2296 iocb->ki_pos + count - 1);
2297 if (retval < 0)
2298 goto out;
2299 }
2300
2301 file_accessed(file);
2302
2303 retval = mapping->a_ops->direct_IO(iocb, iter);
2304 if (retval >= 0) {
2305 iocb->ki_pos += retval;
2306 count -= retval;
2307 }
2308 iov_iter_revert(iter, count - iov_iter_count(iter));
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319 if (retval < 0 || !count || iocb->ki_pos >= size ||
2320 IS_DAX(inode))
2321 goto out;
2322 }
2323
2324 retval = generic_file_buffered_read(iocb, iter, retval);
2325 out:
2326 return retval;
2327 }
2328 EXPORT_SYMBOL(generic_file_read_iter);
2329
2330 #ifdef CONFIG_MMU
2331 #define MMAP_LOTSAMISS (100)
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343 static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
2344 struct file **fpin)
2345 {
2346 if (trylock_page(page))
2347 return 1;
2348
2349
2350
2351
2352
2353
2354 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2355 return 0;
2356
2357 *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
2358 if (vmf->flags & FAULT_FLAG_KILLABLE) {
2359 if (__lock_page_killable(page)) {
2360
2361
2362
2363
2364
2365
2366 if (*fpin == NULL)
2367 up_read(&vmf->vma->vm_mm->mmap_sem);
2368 return 0;
2369 }
2370 } else
2371 __lock_page(page);
2372 return 1;
2373 }
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383 static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
2384 {
2385 struct file *file = vmf->vma->vm_file;
2386 struct file_ra_state *ra = &file->f_ra;
2387 struct address_space *mapping = file->f_mapping;
2388 struct file *fpin = NULL;
2389 pgoff_t offset = vmf->pgoff;
2390
2391
2392 if (vmf->vma->vm_flags & VM_RAND_READ)
2393 return fpin;
2394 if (!ra->ra_pages)
2395 return fpin;
2396
2397 if (vmf->vma->vm_flags & VM_SEQ_READ) {
2398 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2399 page_cache_sync_readahead(mapping, ra, file, offset,
2400 ra->ra_pages);
2401 return fpin;
2402 }
2403
2404
2405 if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
2406 ra->mmap_miss++;
2407
2408
2409
2410
2411
2412 if (ra->mmap_miss > MMAP_LOTSAMISS)
2413 return fpin;
2414
2415
2416
2417
2418 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2419 ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
2420 ra->size = ra->ra_pages;
2421 ra->async_size = ra->ra_pages / 4;
2422 ra_submit(ra, mapping, file);
2423 return fpin;
2424 }
2425
2426
2427
2428
2429
2430
2431 static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
2432 struct page *page)
2433 {
2434 struct file *file = vmf->vma->vm_file;
2435 struct file_ra_state *ra = &file->f_ra;
2436 struct address_space *mapping = file->f_mapping;
2437 struct file *fpin = NULL;
2438 pgoff_t offset = vmf->pgoff;
2439
2440
2441 if (vmf->vma->vm_flags & VM_RAND_READ)
2442 return fpin;
2443 if (ra->mmap_miss > 0)
2444 ra->mmap_miss--;
2445 if (PageReadahead(page)) {
2446 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2447 page_cache_async_readahead(mapping, ra, file,
2448 page, offset, ra->ra_pages);
2449 }
2450 return fpin;
2451 }
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476 vm_fault_t filemap_fault(struct vm_fault *vmf)
2477 {
2478 int error;
2479 struct file *file = vmf->vma->vm_file;
2480 struct file *fpin = NULL;
2481 struct address_space *mapping = file->f_mapping;
2482 struct file_ra_state *ra = &file->f_ra;
2483 struct inode *inode = mapping->host;
2484 pgoff_t offset = vmf->pgoff;
2485 pgoff_t max_off;
2486 struct page *page;
2487 vm_fault_t ret = 0;
2488
2489 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2490 if (unlikely(offset >= max_off))
2491 return VM_FAULT_SIGBUS;
2492
2493
2494
2495
2496 page = find_get_page(mapping, offset);
2497 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
2498
2499
2500
2501
2502 fpin = do_async_mmap_readahead(vmf, page);
2503 } else if (!page) {
2504
2505 count_vm_event(PGMAJFAULT);
2506 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
2507 ret = VM_FAULT_MAJOR;
2508 fpin = do_sync_mmap_readahead(vmf);
2509 retry_find:
2510 page = pagecache_get_page(mapping, offset,
2511 FGP_CREAT|FGP_FOR_MMAP,
2512 vmf->gfp_mask);
2513 if (!page) {
2514 if (fpin)
2515 goto out_retry;
2516 return vmf_error(-ENOMEM);
2517 }
2518 }
2519
2520 if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
2521 goto out_retry;
2522
2523
2524 if (unlikely(compound_head(page)->mapping != mapping)) {
2525 unlock_page(page);
2526 put_page(page);
2527 goto retry_find;
2528 }
2529 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
2530
2531
2532
2533
2534
2535 if (unlikely(!PageUptodate(page)))
2536 goto page_not_uptodate;
2537
2538
2539
2540
2541
2542
2543 if (fpin) {
2544 unlock_page(page);
2545 goto out_retry;
2546 }
2547
2548
2549
2550
2551
2552 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2553 if (unlikely(offset >= max_off)) {
2554 unlock_page(page);
2555 put_page(page);
2556 return VM_FAULT_SIGBUS;
2557 }
2558
2559 vmf->page = page;
2560 return ret | VM_FAULT_LOCKED;
2561
2562 page_not_uptodate:
2563
2564
2565
2566
2567
2568
2569 ClearPageError(page);
2570 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2571 error = mapping->a_ops->readpage(file, page);
2572 if (!error) {
2573 wait_on_page_locked(page);
2574 if (!PageUptodate(page))
2575 error = -EIO;
2576 }
2577 if (fpin)
2578 goto out_retry;
2579 put_page(page);
2580
2581 if (!error || error == AOP_TRUNCATED_PAGE)
2582 goto retry_find;
2583
2584
2585 shrink_readahead_size_eio(file, ra);
2586 return VM_FAULT_SIGBUS;
2587
2588 out_retry:
2589
2590
2591
2592
2593
2594 if (page)
2595 put_page(page);
2596 if (fpin)
2597 fput(fpin);
2598 return ret | VM_FAULT_RETRY;
2599 }
2600 EXPORT_SYMBOL(filemap_fault);
2601
2602 void filemap_map_pages(struct vm_fault *vmf,
2603 pgoff_t start_pgoff, pgoff_t end_pgoff)
2604 {
2605 struct file *file = vmf->vma->vm_file;
2606 struct address_space *mapping = file->f_mapping;
2607 pgoff_t last_pgoff = start_pgoff;
2608 unsigned long max_idx;
2609 XA_STATE(xas, &mapping->i_pages, start_pgoff);
2610 struct page *page;
2611
2612 rcu_read_lock();
2613 xas_for_each(&xas, page, end_pgoff) {
2614 if (xas_retry(&xas, page))
2615 continue;
2616 if (xa_is_value(page))
2617 goto next;
2618
2619
2620
2621
2622
2623 if (PageLocked(page))
2624 goto next;
2625 if (!page_cache_get_speculative(page))
2626 goto next;
2627
2628
2629 if (unlikely(page != xas_reload(&xas)))
2630 goto skip;
2631 page = find_subpage(page, xas.xa_index);
2632
2633 if (!PageUptodate(page) ||
2634 PageReadahead(page) ||
2635 PageHWPoison(page))
2636 goto skip;
2637 if (!trylock_page(page))
2638 goto skip;
2639
2640 if (page->mapping != mapping || !PageUptodate(page))
2641 goto unlock;
2642
2643 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
2644 if (page->index >= max_idx)
2645 goto unlock;
2646
2647 if (file->f_ra.mmap_miss > 0)
2648 file->f_ra.mmap_miss--;
2649
2650 vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
2651 if (vmf->pte)
2652 vmf->pte += xas.xa_index - last_pgoff;
2653 last_pgoff = xas.xa_index;
2654 if (alloc_set_pte(vmf, NULL, page))
2655 goto unlock;
2656 unlock_page(page);
2657 goto next;
2658 unlock:
2659 unlock_page(page);
2660 skip:
2661 put_page(page);
2662 next:
2663
2664 if (pmd_trans_huge(*vmf->pmd))
2665 break;
2666 }
2667 rcu_read_unlock();
2668 }
2669 EXPORT_SYMBOL(filemap_map_pages);
2670
2671 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2672 {
2673 struct page *page = vmf->page;
2674 struct inode *inode = file_inode(vmf->vma->vm_file);
2675 vm_fault_t ret = VM_FAULT_LOCKED;
2676
2677 sb_start_pagefault(inode->i_sb);
2678 file_update_time(vmf->vma->vm_file);
2679 lock_page(page);
2680 if (page->mapping != inode->i_mapping) {
2681 unlock_page(page);
2682 ret = VM_FAULT_NOPAGE;
2683 goto out;
2684 }
2685
2686
2687
2688
2689
2690 set_page_dirty(page);
2691 wait_for_stable_page(page);
2692 out:
2693 sb_end_pagefault(inode->i_sb);
2694 return ret;
2695 }
2696
2697 const struct vm_operations_struct generic_file_vm_ops = {
2698 .fault = filemap_fault,
2699 .map_pages = filemap_map_pages,
2700 .page_mkwrite = filemap_page_mkwrite,
2701 };
2702
2703
2704
2705 int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2706 {
2707 struct address_space *mapping = file->f_mapping;
2708
2709 if (!mapping->a_ops->readpage)
2710 return -ENOEXEC;
2711 file_accessed(file);
2712 vma->vm_ops = &generic_file_vm_ops;
2713 return 0;
2714 }
2715
2716
2717
2718
2719 int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
2720 {
2721 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
2722 return -EINVAL;
2723 return generic_file_mmap(file, vma);
2724 }
2725 #else
2726 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2727 {
2728 return VM_FAULT_SIGBUS;
2729 }
2730 int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2731 {
2732 return -ENOSYS;
2733 }
2734 int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
2735 {
2736 return -ENOSYS;
2737 }
2738 #endif
2739
2740 EXPORT_SYMBOL(filemap_page_mkwrite);
2741 EXPORT_SYMBOL(generic_file_mmap);
2742 EXPORT_SYMBOL(generic_file_readonly_mmap);
2743
2744 static struct page *wait_on_page_read(struct page *page)
2745 {
2746 if (!IS_ERR(page)) {
2747 wait_on_page_locked(page);
2748 if (!PageUptodate(page)) {
2749 put_page(page);
2750 page = ERR_PTR(-EIO);
2751 }
2752 }
2753 return page;
2754 }
2755
2756 static struct page *do_read_cache_page(struct address_space *mapping,
2757 pgoff_t index,
2758 int (*filler)(void *, struct page *),
2759 void *data,
2760 gfp_t gfp)
2761 {
2762 struct page *page;
2763 int err;
2764 repeat:
2765 page = find_get_page(mapping, index);
2766 if (!page) {
2767 page = __page_cache_alloc(gfp);
2768 if (!page)
2769 return ERR_PTR(-ENOMEM);
2770 err = add_to_page_cache_lru(page, mapping, index, gfp);
2771 if (unlikely(err)) {
2772 put_page(page);
2773 if (err == -EEXIST)
2774 goto repeat;
2775
2776 return ERR_PTR(err);
2777 }
2778
2779 filler:
2780 if (filler)
2781 err = filler(data, page);
2782 else
2783 err = mapping->a_ops->readpage(data, page);
2784
2785 if (err < 0) {
2786 put_page(page);
2787 return ERR_PTR(err);
2788 }
2789
2790 page = wait_on_page_read(page);
2791 if (IS_ERR(page))
2792 return page;
2793 goto out;
2794 }
2795 if (PageUptodate(page))
2796 goto out;
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829 wait_on_page_locked(page);
2830 if (PageUptodate(page))
2831 goto out;
2832
2833
2834 lock_page(page);
2835
2836
2837 if (!page->mapping) {
2838 unlock_page(page);
2839 put_page(page);
2840 goto repeat;
2841 }
2842
2843
2844 if (PageUptodate(page)) {
2845 unlock_page(page);
2846 goto out;
2847 }
2848 goto filler;
2849
2850 out:
2851 mark_page_accessed(page);
2852 return page;
2853 }
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869 struct page *read_cache_page(struct address_space *mapping,
2870 pgoff_t index,
2871 int (*filler)(void *, struct page *),
2872 void *data)
2873 {
2874 return do_read_cache_page(mapping, index, filler, data,
2875 mapping_gfp_mask(mapping));
2876 }
2877 EXPORT_SYMBOL(read_cache_page);
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892 struct page *read_cache_page_gfp(struct address_space *mapping,
2893 pgoff_t index,
2894 gfp_t gfp)
2895 {
2896 return do_read_cache_page(mapping, index, NULL, NULL, gfp);
2897 }
2898 EXPORT_SYMBOL(read_cache_page_gfp);
2899
2900
2901
2902
2903
2904
2905 static int generic_write_check_limits(struct file *file, loff_t pos,
2906 loff_t *count)
2907 {
2908 struct inode *inode = file->f_mapping->host;
2909 loff_t max_size = inode->i_sb->s_maxbytes;
2910 loff_t limit = rlimit(RLIMIT_FSIZE);
2911
2912 if (limit != RLIM_INFINITY) {
2913 if (pos >= limit) {
2914 send_sig(SIGXFSZ, current, 0);
2915 return -EFBIG;
2916 }
2917 *count = min(*count, limit - pos);
2918 }
2919
2920 if (!(file->f_flags & O_LARGEFILE))
2921 max_size = MAX_NON_LFS;
2922
2923 if (unlikely(pos >= max_size))
2924 return -EFBIG;
2925
2926 *count = min(*count, max_size - pos);
2927
2928 return 0;
2929 }
2930
2931
2932
2933
2934
2935
2936
2937
2938 inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
2939 {
2940 struct file *file = iocb->ki_filp;
2941 struct inode *inode = file->f_mapping->host;
2942 loff_t count;
2943 int ret;
2944
2945 if (IS_SWAPFILE(inode))
2946 return -ETXTBSY;
2947
2948 if (!iov_iter_count(from))
2949 return 0;
2950
2951
2952 if (iocb->ki_flags & IOCB_APPEND)
2953 iocb->ki_pos = i_size_read(inode);
2954
2955 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
2956 return -EINVAL;
2957
2958 count = iov_iter_count(from);
2959 ret = generic_write_check_limits(file, iocb->ki_pos, &count);
2960 if (ret)
2961 return ret;
2962
2963 iov_iter_truncate(from, count);
2964 return iov_iter_count(from);
2965 }
2966 EXPORT_SYMBOL(generic_write_checks);
2967
2968
2969
2970
2971
2972
2973
2974
2975 int generic_remap_checks(struct file *file_in, loff_t pos_in,
2976 struct file *file_out, loff_t pos_out,
2977 loff_t *req_count, unsigned int remap_flags)
2978 {
2979 struct inode *inode_in = file_in->f_mapping->host;
2980 struct inode *inode_out = file_out->f_mapping->host;
2981 uint64_t count = *req_count;
2982 uint64_t bcount;
2983 loff_t size_in, size_out;
2984 loff_t bs = inode_out->i_sb->s_blocksize;
2985 int ret;
2986
2987
2988 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
2989 return -EINVAL;
2990
2991
2992 if (pos_in + count < pos_in || pos_out + count < pos_out)
2993 return -EINVAL;
2994
2995 size_in = i_size_read(inode_in);
2996 size_out = i_size_read(inode_out);
2997
2998
2999 if ((remap_flags & REMAP_FILE_DEDUP) &&
3000 (pos_in >= size_in || pos_in + count > size_in ||
3001 pos_out >= size_out || pos_out + count > size_out))
3002 return -EINVAL;
3003
3004
3005 if (pos_in >= size_in)
3006 return -EINVAL;
3007 count = min(count, size_in - (uint64_t)pos_in);
3008
3009 ret = generic_write_check_limits(file_out, pos_out, &count);
3010 if (ret)
3011 return ret;
3012
3013
3014
3015
3016
3017
3018
3019
3020 if (pos_in + count == size_in) {
3021 bcount = ALIGN(size_in, bs) - pos_in;
3022 } else {
3023 if (!IS_ALIGNED(count, bs))
3024 count = ALIGN_DOWN(count, bs);
3025 bcount = count;
3026 }
3027
3028
3029 if (inode_in == inode_out &&
3030 pos_out + bcount > pos_in &&
3031 pos_out < pos_in + bcount)
3032 return -EINVAL;
3033
3034
3035
3036
3037
3038 if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
3039 return -EINVAL;
3040
3041 *req_count = count;
3042 return 0;
3043 }
3044
3045
3046
3047
3048
3049
3050 int generic_file_rw_checks(struct file *file_in, struct file *file_out)
3051 {
3052 struct inode *inode_in = file_inode(file_in);
3053 struct inode *inode_out = file_inode(file_out);
3054
3055
3056 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
3057 return -EISDIR;
3058 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
3059 return -EINVAL;
3060
3061 if (!(file_in->f_mode & FMODE_READ) ||
3062 !(file_out->f_mode & FMODE_WRITE) ||
3063 (file_out->f_flags & O_APPEND))
3064 return -EBADF;
3065
3066 return 0;
3067 }
3068
3069
3070
3071
3072
3073
3074
3075
3076 int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
3077 struct file *file_out, loff_t pos_out,
3078 size_t *req_count, unsigned int flags)
3079 {
3080 struct inode *inode_in = file_inode(file_in);
3081 struct inode *inode_out = file_inode(file_out);
3082 uint64_t count = *req_count;
3083 loff_t size_in;
3084 int ret;
3085
3086 ret = generic_file_rw_checks(file_in, file_out);
3087 if (ret)
3088 return ret;
3089
3090
3091 if (IS_IMMUTABLE(inode_out))
3092 return -EPERM;
3093
3094 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
3095 return -ETXTBSY;
3096
3097
3098 if (pos_in + count < pos_in || pos_out + count < pos_out)
3099 return -EOVERFLOW;
3100
3101
3102 size_in = i_size_read(inode_in);
3103 if (pos_in >= size_in)
3104 count = 0;
3105 else
3106 count = min(count, size_in - (uint64_t)pos_in);
3107
3108 ret = generic_write_check_limits(file_out, pos_out, &count);
3109 if (ret)
3110 return ret;
3111
3112
3113 if (inode_in == inode_out &&
3114 pos_out + count > pos_in &&
3115 pos_out < pos_in + count)
3116 return -EINVAL;
3117
3118 *req_count = count;
3119 return 0;
3120 }
3121
3122 int pagecache_write_begin(struct file *file, struct address_space *mapping,
3123 loff_t pos, unsigned len, unsigned flags,
3124 struct page **pagep, void **fsdata)
3125 {
3126 const struct address_space_operations *aops = mapping->a_ops;
3127
3128 return aops->write_begin(file, mapping, pos, len, flags,
3129 pagep, fsdata);
3130 }
3131 EXPORT_SYMBOL(pagecache_write_begin);
3132
3133 int pagecache_write_end(struct file *file, struct address_space *mapping,
3134 loff_t pos, unsigned len, unsigned copied,
3135 struct page *page, void *fsdata)
3136 {
3137 const struct address_space_operations *aops = mapping->a_ops;
3138
3139 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
3140 }
3141 EXPORT_SYMBOL(pagecache_write_end);
3142
3143 ssize_t
3144 generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
3145 {
3146 struct file *file = iocb->ki_filp;
3147 struct address_space *mapping = file->f_mapping;
3148 struct inode *inode = mapping->host;
3149 loff_t pos = iocb->ki_pos;
3150 ssize_t written;
3151 size_t write_len;
3152 pgoff_t end;
3153
3154 write_len = iov_iter_count(from);
3155 end = (pos + write_len - 1) >> PAGE_SHIFT;
3156
3157 if (iocb->ki_flags & IOCB_NOWAIT) {
3158
3159 if (filemap_range_has_page(inode->i_mapping, pos,
3160 pos + write_len - 1))
3161 return -EAGAIN;
3162 } else {
3163 written = filemap_write_and_wait_range(mapping, pos,
3164 pos + write_len - 1);
3165 if (written)
3166 goto out;
3167 }
3168
3169
3170
3171
3172
3173
3174
3175 written = invalidate_inode_pages2_range(mapping,
3176 pos >> PAGE_SHIFT, end);
3177
3178
3179
3180
3181 if (written) {
3182 if (written == -EBUSY)
3183 return 0;
3184 goto out;
3185 }
3186
3187 written = mapping->a_ops->direct_IO(iocb, from);
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202 if (mapping->nrpages)
3203 invalidate_inode_pages2_range(mapping,
3204 pos >> PAGE_SHIFT, end);
3205
3206 if (written > 0) {
3207 pos += written;
3208 write_len -= written;
3209 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
3210 i_size_write(inode, pos);
3211 mark_inode_dirty(inode);
3212 }
3213 iocb->ki_pos = pos;
3214 }
3215 iov_iter_revert(from, write_len - iov_iter_count(from));
3216 out:
3217 return written;
3218 }
3219 EXPORT_SYMBOL(generic_file_direct_write);
3220
3221
3222
3223
3224
3225 struct page *grab_cache_page_write_begin(struct address_space *mapping,
3226 pgoff_t index, unsigned flags)
3227 {
3228 struct page *page;
3229 int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
3230
3231 if (flags & AOP_FLAG_NOFS)
3232 fgp_flags |= FGP_NOFS;
3233
3234 page = pagecache_get_page(mapping, index, fgp_flags,
3235 mapping_gfp_mask(mapping));
3236 if (page)
3237 wait_for_stable_page(page);
3238
3239 return page;
3240 }
3241 EXPORT_SYMBOL(grab_cache_page_write_begin);
3242
3243 ssize_t generic_perform_write(struct file *file,
3244 struct iov_iter *i, loff_t pos)
3245 {
3246 struct address_space *mapping = file->f_mapping;
3247 const struct address_space_operations *a_ops = mapping->a_ops;
3248 long status = 0;
3249 ssize_t written = 0;
3250 unsigned int flags = 0;
3251
3252 do {
3253 struct page *page;
3254 unsigned long offset;
3255 unsigned long bytes;
3256 size_t copied;
3257 void *fsdata;
3258
3259 offset = (pos & (PAGE_SIZE - 1));
3260 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3261 iov_iter_count(i));
3262
3263 again:
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
3275 status = -EFAULT;
3276 break;
3277 }
3278
3279 if (fatal_signal_pending(current)) {
3280 status = -EINTR;
3281 break;
3282 }
3283
3284 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
3285 &page, &fsdata);
3286 if (unlikely(status < 0))
3287 break;
3288
3289 if (mapping_writably_mapped(mapping))
3290 flush_dcache_page(page);
3291
3292 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
3293 flush_dcache_page(page);
3294
3295 status = a_ops->write_end(file, mapping, pos, bytes, copied,
3296 page, fsdata);
3297 if (unlikely(status < 0))
3298 break;
3299 copied = status;
3300
3301 cond_resched();
3302
3303 iov_iter_advance(i, copied);
3304 if (unlikely(copied == 0)) {
3305
3306
3307
3308
3309
3310
3311
3312
3313 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3314 iov_iter_single_seg_count(i));
3315 goto again;
3316 }
3317 pos += copied;
3318 written += copied;
3319
3320 balance_dirty_pages_ratelimited(mapping);
3321 } while (iov_iter_count(i));
3322
3323 return written ? written : status;
3324 }
3325 EXPORT_SYMBOL(generic_perform_write);
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348 ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3349 {
3350 struct file *file = iocb->ki_filp;
3351 struct address_space * mapping = file->f_mapping;
3352 struct inode *inode = mapping->host;
3353 ssize_t written = 0;
3354 ssize_t err;
3355 ssize_t status;
3356
3357
3358 current->backing_dev_info = inode_to_bdi(inode);
3359 err = file_remove_privs(file);
3360 if (err)
3361 goto out;
3362
3363 err = file_update_time(file);
3364 if (err)
3365 goto out;
3366
3367 if (iocb->ki_flags & IOCB_DIRECT) {
3368 loff_t pos, endbyte;
3369
3370 written = generic_file_direct_write(iocb, from);
3371
3372
3373
3374
3375
3376
3377
3378 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
3379 goto out;
3380
3381 status = generic_perform_write(file, from, pos = iocb->ki_pos);
3382
3383
3384
3385
3386
3387
3388
3389 if (unlikely(status < 0)) {
3390 err = status;
3391 goto out;
3392 }
3393
3394
3395
3396
3397
3398 endbyte = pos + status - 1;
3399 err = filemap_write_and_wait_range(mapping, pos, endbyte);
3400 if (err == 0) {
3401 iocb->ki_pos = endbyte + 1;
3402 written += status;
3403 invalidate_mapping_pages(mapping,
3404 pos >> PAGE_SHIFT,
3405 endbyte >> PAGE_SHIFT);
3406 } else {
3407
3408
3409
3410
3411 }
3412 } else {
3413 written = generic_perform_write(file, from, iocb->ki_pos);
3414 if (likely(written > 0))
3415 iocb->ki_pos += written;
3416 }
3417 out:
3418 current->backing_dev_info = NULL;
3419 return written ? written : err;
3420 }
3421 EXPORT_SYMBOL(__generic_file_write_iter);
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436 ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3437 {
3438 struct file *file = iocb->ki_filp;
3439 struct inode *inode = file->f_mapping->host;
3440 ssize_t ret;
3441
3442 inode_lock(inode);
3443 ret = generic_write_checks(iocb, from);
3444 if (ret > 0)
3445 ret = __generic_file_write_iter(iocb, from);
3446 inode_unlock(inode);
3447
3448 if (ret > 0)
3449 ret = generic_write_sync(iocb, ret);
3450 return ret;
3451 }
3452 EXPORT_SYMBOL(generic_file_write_iter);
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471 int try_to_release_page(struct page *page, gfp_t gfp_mask)
3472 {
3473 struct address_space * const mapping = page->mapping;
3474
3475 BUG_ON(!PageLocked(page));
3476 if (PageWriteback(page))
3477 return 0;
3478
3479 if (mapping && mapping->a_ops->releasepage)
3480 return mapping->a_ops->releasepage(page, gfp_mask);
3481 return try_to_free_buffers(page);
3482 }
3483
3484 EXPORT_SYMBOL(try_to_release_page);