This source file includes following definitions.
- touch_buffer
- __lock_buffer
- unlock_buffer
- buffer_check_dirty_writeback
- __wait_on_buffer
- __clear_page_buffers
- buffer_io_error
- __end_buffer_read_notouch
- end_buffer_read_sync
- end_buffer_write_sync
- __find_get_block_slow
- end_buffer_async_read
- end_buffer_async_write
- mark_buffer_async_read
- mark_buffer_async_write_endio
- mark_buffer_async_write
- __remove_assoc_queue
- inode_has_buffers
- osync_buffers_list
- emergency_thaw_bdev
- sync_mapping_buffers
- write_boundary_block
- mark_buffer_dirty_inode
- __set_page_dirty
- __set_page_dirty_buffers
- fsync_buffers_list
- invalidate_inode_buffers
- remove_inode_buffers
- alloc_page_buffers
- link_dev_buffers
- blkdev_max_block
- init_page_buffers
- grow_dev_page
- grow_buffers
- __getblk_slow
- mark_buffer_dirty
- mark_buffer_write_io_error
- __brelse
- __bforget
- __bread_slow
- check_irqs_on
- bh_lru_install
- lookup_bh_lru
- __find_get_block
- __getblk_gfp
- __breadahead
- __breadahead_gfp
- __bread_gfp
- invalidate_bh_lru
- has_bh_in_lru
- invalidate_bh_lrus
- set_bh_page
- discard_buffer
- block_invalidatepage
- create_empty_buffers
- clean_bdev_aliases
- block_size_bits
- create_page_buffers
- __block_write_full_page
- page_zero_new_buffers
- iomap_to_bh
- __block_write_begin_int
- __block_write_begin
- __block_commit_write
- block_write_begin
- block_write_end
- generic_write_end
- block_is_partially_uptodate
- block_read_full_page
- generic_cont_expand_simple
- cont_expand_zero
- cont_write_begin
- block_commit_write
- block_page_mkwrite
- end_buffer_read_nobh
- attach_nobh_buffers
- nobh_write_begin
- nobh_write_end
- nobh_writepage
- nobh_truncate_page
- block_truncate_page
- block_write_full_page
- generic_block_bmap
- end_bio_bh_io_sync
- guard_bio_eod
- submit_bh_wbc
- submit_bh
- ll_rw_block
- write_dirty_buffer
- __sync_dirty_buffer
- sync_dirty_buffer
- buffer_busy
- drop_buffers
- try_to_free_buffers
- SYSCALL_DEFINE2
- recalc_bh_state
- alloc_buffer_head
- free_buffer_head
- buffer_exit_cpu_dead
- bh_uptodate_or_lock
- bh_submit_read
- buffer_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 #include <linux/kernel.h>
23 #include <linux/sched/signal.h>
24 #include <linux/syscalls.h>
25 #include <linux/fs.h>
26 #include <linux/iomap.h>
27 #include <linux/mm.h>
28 #include <linux/percpu.h>
29 #include <linux/slab.h>
30 #include <linux/capability.h>
31 #include <linux/blkdev.h>
32 #include <linux/file.h>
33 #include <linux/quotaops.h>
34 #include <linux/highmem.h>
35 #include <linux/export.h>
36 #include <linux/backing-dev.h>
37 #include <linux/writeback.h>
38 #include <linux/hash.h>
39 #include <linux/suspend.h>
40 #include <linux/buffer_head.h>
41 #include <linux/task_io_accounting_ops.h>
42 #include <linux/bio.h>
43 #include <linux/cpu.h>
44 #include <linux/bitops.h>
45 #include <linux/mpage.h>
46 #include <linux/bit_spinlock.h>
47 #include <linux/pagevec.h>
48 #include <linux/sched/mm.h>
49 #include <trace/events/block.h>
50
51 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
52 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
53 enum rw_hint hint, struct writeback_control *wbc);
54
55 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
56
57 inline void touch_buffer(struct buffer_head *bh)
58 {
59 trace_block_touch_buffer(bh);
60 mark_page_accessed(bh->b_page);
61 }
62 EXPORT_SYMBOL(touch_buffer);
63
64 void __lock_buffer(struct buffer_head *bh)
65 {
66 wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
67 }
68 EXPORT_SYMBOL(__lock_buffer);
69
70 void unlock_buffer(struct buffer_head *bh)
71 {
72 clear_bit_unlock(BH_Lock, &bh->b_state);
73 smp_mb__after_atomic();
74 wake_up_bit(&bh->b_state, BH_Lock);
75 }
76 EXPORT_SYMBOL(unlock_buffer);
77
78
79
80
81
82
83 void buffer_check_dirty_writeback(struct page *page,
84 bool *dirty, bool *writeback)
85 {
86 struct buffer_head *head, *bh;
87 *dirty = false;
88 *writeback = false;
89
90 BUG_ON(!PageLocked(page));
91
92 if (!page_has_buffers(page))
93 return;
94
95 if (PageWriteback(page))
96 *writeback = true;
97
98 head = page_buffers(page);
99 bh = head;
100 do {
101 if (buffer_locked(bh))
102 *writeback = true;
103
104 if (buffer_dirty(bh))
105 *dirty = true;
106
107 bh = bh->b_this_page;
108 } while (bh != head);
109 }
110 EXPORT_SYMBOL(buffer_check_dirty_writeback);
111
112
113
114
115
116
117 void __wait_on_buffer(struct buffer_head * bh)
118 {
119 wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
120 }
121 EXPORT_SYMBOL(__wait_on_buffer);
122
123 static void
124 __clear_page_buffers(struct page *page)
125 {
126 ClearPagePrivate(page);
127 set_page_private(page, 0);
128 put_page(page);
129 }
130
131 static void buffer_io_error(struct buffer_head *bh, char *msg)
132 {
133 if (!test_bit(BH_Quiet, &bh->b_state))
134 printk_ratelimited(KERN_ERR
135 "Buffer I/O error on dev %pg, logical block %llu%s\n",
136 bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
137 }
138
139
140
141
142
143
144
145
146
147 static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
148 {
149 if (uptodate) {
150 set_buffer_uptodate(bh);
151 } else {
152
153 clear_buffer_uptodate(bh);
154 }
155 unlock_buffer(bh);
156 }
157
158
159
160
161
162 void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
163 {
164 __end_buffer_read_notouch(bh, uptodate);
165 put_bh(bh);
166 }
167 EXPORT_SYMBOL(end_buffer_read_sync);
168
169 void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
170 {
171 if (uptodate) {
172 set_buffer_uptodate(bh);
173 } else {
174 buffer_io_error(bh, ", lost sync page write");
175 mark_buffer_write_io_error(bh);
176 clear_buffer_uptodate(bh);
177 }
178 unlock_buffer(bh);
179 put_bh(bh);
180 }
181 EXPORT_SYMBOL(end_buffer_write_sync);
182
183
184
185
186
187
188
189
190
191
192
193 static struct buffer_head *
194 __find_get_block_slow(struct block_device *bdev, sector_t block)
195 {
196 struct inode *bd_inode = bdev->bd_inode;
197 struct address_space *bd_mapping = bd_inode->i_mapping;
198 struct buffer_head *ret = NULL;
199 pgoff_t index;
200 struct buffer_head *bh;
201 struct buffer_head *head;
202 struct page *page;
203 int all_mapped = 1;
204 static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
205
206 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
207 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
208 if (!page)
209 goto out;
210
211 spin_lock(&bd_mapping->private_lock);
212 if (!page_has_buffers(page))
213 goto out_unlock;
214 head = page_buffers(page);
215 bh = head;
216 do {
217 if (!buffer_mapped(bh))
218 all_mapped = 0;
219 else if (bh->b_blocknr == block) {
220 ret = bh;
221 get_bh(bh);
222 goto out_unlock;
223 }
224 bh = bh->b_this_page;
225 } while (bh != head);
226
227
228
229
230
231
232 ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
233 if (all_mapped && __ratelimit(&last_warned)) {
234 printk("__find_get_block_slow() failed. block=%llu, "
235 "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
236 "device %pg blocksize: %d\n",
237 (unsigned long long)block,
238 (unsigned long long)bh->b_blocknr,
239 bh->b_state, bh->b_size, bdev,
240 1 << bd_inode->i_blkbits);
241 }
242 out_unlock:
243 spin_unlock(&bd_mapping->private_lock);
244 put_page(page);
245 out:
246 return ret;
247 }
248
249
250
251
252
253 static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
254 {
255 unsigned long flags;
256 struct buffer_head *first;
257 struct buffer_head *tmp;
258 struct page *page;
259 int page_uptodate = 1;
260
261 BUG_ON(!buffer_async_read(bh));
262
263 page = bh->b_page;
264 if (uptodate) {
265 set_buffer_uptodate(bh);
266 } else {
267 clear_buffer_uptodate(bh);
268 buffer_io_error(bh, ", async page read");
269 SetPageError(page);
270 }
271
272
273
274
275
276
277 first = page_buffers(page);
278 local_irq_save(flags);
279 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
280 clear_buffer_async_read(bh);
281 unlock_buffer(bh);
282 tmp = bh;
283 do {
284 if (!buffer_uptodate(tmp))
285 page_uptodate = 0;
286 if (buffer_async_read(tmp)) {
287 BUG_ON(!buffer_locked(tmp));
288 goto still_busy;
289 }
290 tmp = tmp->b_this_page;
291 } while (tmp != bh);
292 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
293 local_irq_restore(flags);
294
295
296
297
298
299 if (page_uptodate && !PageError(page))
300 SetPageUptodate(page);
301 unlock_page(page);
302 return;
303
304 still_busy:
305 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
306 local_irq_restore(flags);
307 return;
308 }
309
310
311
312
313
314 void end_buffer_async_write(struct buffer_head *bh, int uptodate)
315 {
316 unsigned long flags;
317 struct buffer_head *first;
318 struct buffer_head *tmp;
319 struct page *page;
320
321 BUG_ON(!buffer_async_write(bh));
322
323 page = bh->b_page;
324 if (uptodate) {
325 set_buffer_uptodate(bh);
326 } else {
327 buffer_io_error(bh, ", lost async page write");
328 mark_buffer_write_io_error(bh);
329 clear_buffer_uptodate(bh);
330 SetPageError(page);
331 }
332
333 first = page_buffers(page);
334 local_irq_save(flags);
335 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
336
337 clear_buffer_async_write(bh);
338 unlock_buffer(bh);
339 tmp = bh->b_this_page;
340 while (tmp != bh) {
341 if (buffer_async_write(tmp)) {
342 BUG_ON(!buffer_locked(tmp));
343 goto still_busy;
344 }
345 tmp = tmp->b_this_page;
346 }
347 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
348 local_irq_restore(flags);
349 end_page_writeback(page);
350 return;
351
352 still_busy:
353 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
354 local_irq_restore(flags);
355 return;
356 }
357 EXPORT_SYMBOL(end_buffer_async_write);
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380 static void mark_buffer_async_read(struct buffer_head *bh)
381 {
382 bh->b_end_io = end_buffer_async_read;
383 set_buffer_async_read(bh);
384 }
385
386 static void mark_buffer_async_write_endio(struct buffer_head *bh,
387 bh_end_io_t *handler)
388 {
389 bh->b_end_io = handler;
390 set_buffer_async_write(bh);
391 }
392
393 void mark_buffer_async_write(struct buffer_head *bh)
394 {
395 mark_buffer_async_write_endio(bh, end_buffer_async_write);
396 }
397 EXPORT_SYMBOL(mark_buffer_async_write);
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452 static void __remove_assoc_queue(struct buffer_head *bh)
453 {
454 list_del_init(&bh->b_assoc_buffers);
455 WARN_ON(!bh->b_assoc_map);
456 bh->b_assoc_map = NULL;
457 }
458
459 int inode_has_buffers(struct inode *inode)
460 {
461 return !list_empty(&inode->i_data.private_list);
462 }
463
464
465
466
467
468
469
470
471
472
473
474 static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
475 {
476 struct buffer_head *bh;
477 struct list_head *p;
478 int err = 0;
479
480 spin_lock(lock);
481 repeat:
482 list_for_each_prev(p, list) {
483 bh = BH_ENTRY(p);
484 if (buffer_locked(bh)) {
485 get_bh(bh);
486 spin_unlock(lock);
487 wait_on_buffer(bh);
488 if (!buffer_uptodate(bh))
489 err = -EIO;
490 brelse(bh);
491 spin_lock(lock);
492 goto repeat;
493 }
494 }
495 spin_unlock(lock);
496 return err;
497 }
498
499 void emergency_thaw_bdev(struct super_block *sb)
500 {
501 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
502 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
503 }
504
505
506
507
508
509
510
511
512
513
514
515
516 int sync_mapping_buffers(struct address_space *mapping)
517 {
518 struct address_space *buffer_mapping = mapping->private_data;
519
520 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
521 return 0;
522
523 return fsync_buffers_list(&buffer_mapping->private_lock,
524 &mapping->private_list);
525 }
526 EXPORT_SYMBOL(sync_mapping_buffers);
527
528
529
530
531
532
533
534 void write_boundary_block(struct block_device *bdev,
535 sector_t bblock, unsigned blocksize)
536 {
537 struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
538 if (bh) {
539 if (buffer_dirty(bh))
540 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
541 put_bh(bh);
542 }
543 }
544
545 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
546 {
547 struct address_space *mapping = inode->i_mapping;
548 struct address_space *buffer_mapping = bh->b_page->mapping;
549
550 mark_buffer_dirty(bh);
551 if (!mapping->private_data) {
552 mapping->private_data = buffer_mapping;
553 } else {
554 BUG_ON(mapping->private_data != buffer_mapping);
555 }
556 if (!bh->b_assoc_map) {
557 spin_lock(&buffer_mapping->private_lock);
558 list_move_tail(&bh->b_assoc_buffers,
559 &mapping->private_list);
560 bh->b_assoc_map = mapping;
561 spin_unlock(&buffer_mapping->private_lock);
562 }
563 }
564 EXPORT_SYMBOL(mark_buffer_dirty_inode);
565
566
567
568
569
570
571
572
573
574
575 void __set_page_dirty(struct page *page, struct address_space *mapping,
576 int warn)
577 {
578 unsigned long flags;
579
580 xa_lock_irqsave(&mapping->i_pages, flags);
581 if (page->mapping) {
582 WARN_ON_ONCE(warn && !PageUptodate(page));
583 account_page_dirtied(page, mapping);
584 __xa_set_mark(&mapping->i_pages, page_index(page),
585 PAGECACHE_TAG_DIRTY);
586 }
587 xa_unlock_irqrestore(&mapping->i_pages, flags);
588 }
589 EXPORT_SYMBOL_GPL(__set_page_dirty);
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616 int __set_page_dirty_buffers(struct page *page)
617 {
618 int newly_dirty;
619 struct address_space *mapping = page_mapping(page);
620
621 if (unlikely(!mapping))
622 return !TestSetPageDirty(page);
623
624 spin_lock(&mapping->private_lock);
625 if (page_has_buffers(page)) {
626 struct buffer_head *head = page_buffers(page);
627 struct buffer_head *bh = head;
628
629 do {
630 set_buffer_dirty(bh);
631 bh = bh->b_this_page;
632 } while (bh != head);
633 }
634
635
636
637
638 lock_page_memcg(page);
639 newly_dirty = !TestSetPageDirty(page);
640 spin_unlock(&mapping->private_lock);
641
642 if (newly_dirty)
643 __set_page_dirty(page, mapping, 1);
644
645 unlock_page_memcg(page);
646
647 if (newly_dirty)
648 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
649
650 return newly_dirty;
651 }
652 EXPORT_SYMBOL(__set_page_dirty_buffers);
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
674 {
675 struct buffer_head *bh;
676 struct list_head tmp;
677 struct address_space *mapping;
678 int err = 0, err2;
679 struct blk_plug plug;
680
681 INIT_LIST_HEAD(&tmp);
682 blk_start_plug(&plug);
683
684 spin_lock(lock);
685 while (!list_empty(list)) {
686 bh = BH_ENTRY(list->next);
687 mapping = bh->b_assoc_map;
688 __remove_assoc_queue(bh);
689
690
691 smp_mb();
692 if (buffer_dirty(bh) || buffer_locked(bh)) {
693 list_add(&bh->b_assoc_buffers, &tmp);
694 bh->b_assoc_map = mapping;
695 if (buffer_dirty(bh)) {
696 get_bh(bh);
697 spin_unlock(lock);
698
699
700
701
702
703
704
705 write_dirty_buffer(bh, REQ_SYNC);
706
707
708
709
710
711
712
713 brelse(bh);
714 spin_lock(lock);
715 }
716 }
717 }
718
719 spin_unlock(lock);
720 blk_finish_plug(&plug);
721 spin_lock(lock);
722
723 while (!list_empty(&tmp)) {
724 bh = BH_ENTRY(tmp.prev);
725 get_bh(bh);
726 mapping = bh->b_assoc_map;
727 __remove_assoc_queue(bh);
728
729
730 smp_mb();
731 if (buffer_dirty(bh)) {
732 list_add(&bh->b_assoc_buffers,
733 &mapping->private_list);
734 bh->b_assoc_map = mapping;
735 }
736 spin_unlock(lock);
737 wait_on_buffer(bh);
738 if (!buffer_uptodate(bh))
739 err = -EIO;
740 brelse(bh);
741 spin_lock(lock);
742 }
743
744 spin_unlock(lock);
745 err2 = osync_buffers_list(lock, list);
746 if (err)
747 return err;
748 else
749 return err2;
750 }
751
752
753
754
755
756
757
758
759
760
761 void invalidate_inode_buffers(struct inode *inode)
762 {
763 if (inode_has_buffers(inode)) {
764 struct address_space *mapping = &inode->i_data;
765 struct list_head *list = &mapping->private_list;
766 struct address_space *buffer_mapping = mapping->private_data;
767
768 spin_lock(&buffer_mapping->private_lock);
769 while (!list_empty(list))
770 __remove_assoc_queue(BH_ENTRY(list->next));
771 spin_unlock(&buffer_mapping->private_lock);
772 }
773 }
774 EXPORT_SYMBOL(invalidate_inode_buffers);
775
776
777
778
779
780
781
782 int remove_inode_buffers(struct inode *inode)
783 {
784 int ret = 1;
785
786 if (inode_has_buffers(inode)) {
787 struct address_space *mapping = &inode->i_data;
788 struct list_head *list = &mapping->private_list;
789 struct address_space *buffer_mapping = mapping->private_data;
790
791 spin_lock(&buffer_mapping->private_lock);
792 while (!list_empty(list)) {
793 struct buffer_head *bh = BH_ENTRY(list->next);
794 if (buffer_dirty(bh)) {
795 ret = 0;
796 break;
797 }
798 __remove_assoc_queue(bh);
799 }
800 spin_unlock(&buffer_mapping->private_lock);
801 }
802 return ret;
803 }
804
805
806
807
808
809
810
811
812
813
814 struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
815 bool retry)
816 {
817 struct buffer_head *bh, *head;
818 gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
819 long offset;
820 struct mem_cgroup *memcg;
821
822 if (retry)
823 gfp |= __GFP_NOFAIL;
824
825 memcg = get_mem_cgroup_from_page(page);
826 memalloc_use_memcg(memcg);
827
828 head = NULL;
829 offset = PAGE_SIZE;
830 while ((offset -= size) >= 0) {
831 bh = alloc_buffer_head(gfp);
832 if (!bh)
833 goto no_grow;
834
835 bh->b_this_page = head;
836 bh->b_blocknr = -1;
837 head = bh;
838
839 bh->b_size = size;
840
841
842 set_bh_page(bh, page, offset);
843 }
844 out:
845 memalloc_unuse_memcg();
846 mem_cgroup_put(memcg);
847 return head;
848
849
850
851 no_grow:
852 if (head) {
853 do {
854 bh = head;
855 head = head->b_this_page;
856 free_buffer_head(bh);
857 } while (head);
858 }
859
860 goto out;
861 }
862 EXPORT_SYMBOL_GPL(alloc_page_buffers);
863
864 static inline void
865 link_dev_buffers(struct page *page, struct buffer_head *head)
866 {
867 struct buffer_head *bh, *tail;
868
869 bh = head;
870 do {
871 tail = bh;
872 bh = bh->b_this_page;
873 } while (bh);
874 tail->b_this_page = head;
875 attach_page_buffers(page, head);
876 }
877
878 static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
879 {
880 sector_t retval = ~((sector_t)0);
881 loff_t sz = i_size_read(bdev->bd_inode);
882
883 if (sz) {
884 unsigned int sizebits = blksize_bits(size);
885 retval = (sz >> sizebits);
886 }
887 return retval;
888 }
889
890
891
892
893 static sector_t
894 init_page_buffers(struct page *page, struct block_device *bdev,
895 sector_t block, int size)
896 {
897 struct buffer_head *head = page_buffers(page);
898 struct buffer_head *bh = head;
899 int uptodate = PageUptodate(page);
900 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
901
902 do {
903 if (!buffer_mapped(bh)) {
904 bh->b_end_io = NULL;
905 bh->b_private = NULL;
906 bh->b_bdev = bdev;
907 bh->b_blocknr = block;
908 if (uptodate)
909 set_buffer_uptodate(bh);
910 if (block < end_block)
911 set_buffer_mapped(bh);
912 }
913 block++;
914 bh = bh->b_this_page;
915 } while (bh != head);
916
917
918
919
920 return end_block;
921 }
922
923
924
925
926
927
928 static int
929 grow_dev_page(struct block_device *bdev, sector_t block,
930 pgoff_t index, int size, int sizebits, gfp_t gfp)
931 {
932 struct inode *inode = bdev->bd_inode;
933 struct page *page;
934 struct buffer_head *bh;
935 sector_t end_block;
936 int ret = 0;
937 gfp_t gfp_mask;
938
939 gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
940
941
942
943
944
945
946
947 gfp_mask |= __GFP_NOFAIL;
948
949 page = find_or_create_page(inode->i_mapping, index, gfp_mask);
950
951 BUG_ON(!PageLocked(page));
952
953 if (page_has_buffers(page)) {
954 bh = page_buffers(page);
955 if (bh->b_size == size) {
956 end_block = init_page_buffers(page, bdev,
957 (sector_t)index << sizebits,
958 size);
959 goto done;
960 }
961 if (!try_to_free_buffers(page))
962 goto failed;
963 }
964
965
966
967
968 bh = alloc_page_buffers(page, size, true);
969
970
971
972
973
974
975 spin_lock(&inode->i_mapping->private_lock);
976 link_dev_buffers(page, bh);
977 end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
978 size);
979 spin_unlock(&inode->i_mapping->private_lock);
980 done:
981 ret = (block < end_block) ? 1 : -ENXIO;
982 failed:
983 unlock_page(page);
984 put_page(page);
985 return ret;
986 }
987
988
989
990
991
992 static int
993 grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
994 {
995 pgoff_t index;
996 int sizebits;
997
998 sizebits = -1;
999 do {
1000 sizebits++;
1001 } while ((size << sizebits) < PAGE_SIZE);
1002
1003 index = block >> sizebits;
1004
1005
1006
1007
1008
1009 if (unlikely(index != block >> sizebits)) {
1010 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1011 "device %pg\n",
1012 __func__, (unsigned long long)block,
1013 bdev);
1014 return -EIO;
1015 }
1016
1017
1018 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1019 }
1020
1021 static struct buffer_head *
1022 __getblk_slow(struct block_device *bdev, sector_t block,
1023 unsigned size, gfp_t gfp)
1024 {
1025
1026 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1027 (size < 512 || size > PAGE_SIZE))) {
1028 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
1029 size);
1030 printk(KERN_ERR "logical block size: %d\n",
1031 bdev_logical_block_size(bdev));
1032
1033 dump_stack();
1034 return NULL;
1035 }
1036
1037 for (;;) {
1038 struct buffer_head *bh;
1039 int ret;
1040
1041 bh = __find_get_block(bdev, block, size);
1042 if (bh)
1043 return bh;
1044
1045 ret = grow_buffers(bdev, block, size, gfp);
1046 if (ret < 0)
1047 return NULL;
1048 }
1049 }
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086 void mark_buffer_dirty(struct buffer_head *bh)
1087 {
1088 WARN_ON_ONCE(!buffer_uptodate(bh));
1089
1090 trace_block_dirty_buffer(bh);
1091
1092
1093
1094
1095
1096
1097
1098 if (buffer_dirty(bh)) {
1099 smp_mb();
1100 if (buffer_dirty(bh))
1101 return;
1102 }
1103
1104 if (!test_set_buffer_dirty(bh)) {
1105 struct page *page = bh->b_page;
1106 struct address_space *mapping = NULL;
1107
1108 lock_page_memcg(page);
1109 if (!TestSetPageDirty(page)) {
1110 mapping = page_mapping(page);
1111 if (mapping)
1112 __set_page_dirty(page, mapping, 0);
1113 }
1114 unlock_page_memcg(page);
1115 if (mapping)
1116 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1117 }
1118 }
1119 EXPORT_SYMBOL(mark_buffer_dirty);
1120
1121 void mark_buffer_write_io_error(struct buffer_head *bh)
1122 {
1123 set_buffer_write_io_error(bh);
1124
1125 if (bh->b_page && bh->b_page->mapping)
1126 mapping_set_error(bh->b_page->mapping, -EIO);
1127 if (bh->b_assoc_map)
1128 mapping_set_error(bh->b_assoc_map, -EIO);
1129 }
1130 EXPORT_SYMBOL(mark_buffer_write_io_error);
1131
1132
1133
1134
1135
1136
1137
1138
1139 void __brelse(struct buffer_head * buf)
1140 {
1141 if (atomic_read(&buf->b_count)) {
1142 put_bh(buf);
1143 return;
1144 }
1145 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1146 }
1147 EXPORT_SYMBOL(__brelse);
1148
1149
1150
1151
1152
1153 void __bforget(struct buffer_head *bh)
1154 {
1155 clear_buffer_dirty(bh);
1156 if (bh->b_assoc_map) {
1157 struct address_space *buffer_mapping = bh->b_page->mapping;
1158
1159 spin_lock(&buffer_mapping->private_lock);
1160 list_del_init(&bh->b_assoc_buffers);
1161 bh->b_assoc_map = NULL;
1162 spin_unlock(&buffer_mapping->private_lock);
1163 }
1164 __brelse(bh);
1165 }
1166 EXPORT_SYMBOL(__bforget);
1167
1168 static struct buffer_head *__bread_slow(struct buffer_head *bh)
1169 {
1170 lock_buffer(bh);
1171 if (buffer_uptodate(bh)) {
1172 unlock_buffer(bh);
1173 return bh;
1174 } else {
1175 get_bh(bh);
1176 bh->b_end_io = end_buffer_read_sync;
1177 submit_bh(REQ_OP_READ, 0, bh);
1178 wait_on_buffer(bh);
1179 if (buffer_uptodate(bh))
1180 return bh;
1181 }
1182 brelse(bh);
1183 return NULL;
1184 }
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200 #define BH_LRU_SIZE 16
1201
1202 struct bh_lru {
1203 struct buffer_head *bhs[BH_LRU_SIZE];
1204 };
1205
1206 static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
1207
1208 #ifdef CONFIG_SMP
1209 #define bh_lru_lock() local_irq_disable()
1210 #define bh_lru_unlock() local_irq_enable()
1211 #else
1212 #define bh_lru_lock() preempt_disable()
1213 #define bh_lru_unlock() preempt_enable()
1214 #endif
1215
1216 static inline void check_irqs_on(void)
1217 {
1218 #ifdef irqs_disabled
1219 BUG_ON(irqs_disabled());
1220 #endif
1221 }
1222
1223
1224
1225
1226
1227
1228 static void bh_lru_install(struct buffer_head *bh)
1229 {
1230 struct buffer_head *evictee = bh;
1231 struct bh_lru *b;
1232 int i;
1233
1234 check_irqs_on();
1235 bh_lru_lock();
1236
1237 b = this_cpu_ptr(&bh_lrus);
1238 for (i = 0; i < BH_LRU_SIZE; i++) {
1239 swap(evictee, b->bhs[i]);
1240 if (evictee == bh) {
1241 bh_lru_unlock();
1242 return;
1243 }
1244 }
1245
1246 get_bh(bh);
1247 bh_lru_unlock();
1248 brelse(evictee);
1249 }
1250
1251
1252
1253
1254 static struct buffer_head *
1255 lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1256 {
1257 struct buffer_head *ret = NULL;
1258 unsigned int i;
1259
1260 check_irqs_on();
1261 bh_lru_lock();
1262 for (i = 0; i < BH_LRU_SIZE; i++) {
1263 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1264
1265 if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
1266 bh->b_size == size) {
1267 if (i) {
1268 while (i) {
1269 __this_cpu_write(bh_lrus.bhs[i],
1270 __this_cpu_read(bh_lrus.bhs[i - 1]));
1271 i--;
1272 }
1273 __this_cpu_write(bh_lrus.bhs[0], bh);
1274 }
1275 get_bh(bh);
1276 ret = bh;
1277 break;
1278 }
1279 }
1280 bh_lru_unlock();
1281 return ret;
1282 }
1283
1284
1285
1286
1287
1288
1289 struct buffer_head *
1290 __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1291 {
1292 struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
1293
1294 if (bh == NULL) {
1295
1296 bh = __find_get_block_slow(bdev, block);
1297 if (bh)
1298 bh_lru_install(bh);
1299 } else
1300 touch_buffer(bh);
1301
1302 return bh;
1303 }
1304 EXPORT_SYMBOL(__find_get_block);
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314 struct buffer_head *
1315 __getblk_gfp(struct block_device *bdev, sector_t block,
1316 unsigned size, gfp_t gfp)
1317 {
1318 struct buffer_head *bh = __find_get_block(bdev, block, size);
1319
1320 might_sleep();
1321 if (bh == NULL)
1322 bh = __getblk_slow(bdev, block, size, gfp);
1323 return bh;
1324 }
1325 EXPORT_SYMBOL(__getblk_gfp);
1326
1327
1328
1329
1330 void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1331 {
1332 struct buffer_head *bh = __getblk(bdev, block, size);
1333 if (likely(bh)) {
1334 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1335 brelse(bh);
1336 }
1337 }
1338 EXPORT_SYMBOL(__breadahead);
1339
1340 void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
1341 gfp_t gfp)
1342 {
1343 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1344 if (likely(bh)) {
1345 ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
1346 brelse(bh);
1347 }
1348 }
1349 EXPORT_SYMBOL(__breadahead_gfp);
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363 struct buffer_head *
1364 __bread_gfp(struct block_device *bdev, sector_t block,
1365 unsigned size, gfp_t gfp)
1366 {
1367 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1368
1369 if (likely(bh) && !buffer_uptodate(bh))
1370 bh = __bread_slow(bh);
1371 return bh;
1372 }
1373 EXPORT_SYMBOL(__bread_gfp);
1374
1375
1376
1377
1378
1379
1380 static void invalidate_bh_lru(void *arg)
1381 {
1382 struct bh_lru *b = &get_cpu_var(bh_lrus);
1383 int i;
1384
1385 for (i = 0; i < BH_LRU_SIZE; i++) {
1386 brelse(b->bhs[i]);
1387 b->bhs[i] = NULL;
1388 }
1389 put_cpu_var(bh_lrus);
1390 }
1391
1392 static bool has_bh_in_lru(int cpu, void *dummy)
1393 {
1394 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1395 int i;
1396
1397 for (i = 0; i < BH_LRU_SIZE; i++) {
1398 if (b->bhs[i])
1399 return 1;
1400 }
1401
1402 return 0;
1403 }
1404
1405 void invalidate_bh_lrus(void)
1406 {
1407 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1408 }
1409 EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1410
1411 void set_bh_page(struct buffer_head *bh,
1412 struct page *page, unsigned long offset)
1413 {
1414 bh->b_page = page;
1415 BUG_ON(offset >= PAGE_SIZE);
1416 if (PageHighMem(page))
1417
1418
1419
1420 bh->b_data = (char *)(0 + offset);
1421 else
1422 bh->b_data = page_address(page) + offset;
1423 }
1424 EXPORT_SYMBOL(set_bh_page);
1425
1426
1427
1428
1429
1430
1431 #define BUFFER_FLAGS_DISCARD \
1432 (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1433 1 << BH_Delay | 1 << BH_Unwritten)
1434
1435 static void discard_buffer(struct buffer_head * bh)
1436 {
1437 unsigned long b_state, b_state_old;
1438
1439 lock_buffer(bh);
1440 clear_buffer_dirty(bh);
1441 bh->b_bdev = NULL;
1442 b_state = bh->b_state;
1443 for (;;) {
1444 b_state_old = cmpxchg(&bh->b_state, b_state,
1445 (b_state & ~BUFFER_FLAGS_DISCARD));
1446 if (b_state_old == b_state)
1447 break;
1448 b_state = b_state_old;
1449 }
1450 unlock_buffer(bh);
1451 }
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469 void block_invalidatepage(struct page *page, unsigned int offset,
1470 unsigned int length)
1471 {
1472 struct buffer_head *head, *bh, *next;
1473 unsigned int curr_off = 0;
1474 unsigned int stop = length + offset;
1475
1476 BUG_ON(!PageLocked(page));
1477 if (!page_has_buffers(page))
1478 goto out;
1479
1480
1481
1482
1483 BUG_ON(stop > PAGE_SIZE || stop < length);
1484
1485 head = page_buffers(page);
1486 bh = head;
1487 do {
1488 unsigned int next_off = curr_off + bh->b_size;
1489 next = bh->b_this_page;
1490
1491
1492
1493
1494 if (next_off > stop)
1495 goto out;
1496
1497
1498
1499
1500 if (offset <= curr_off)
1501 discard_buffer(bh);
1502 curr_off = next_off;
1503 bh = next;
1504 } while (bh != head);
1505
1506
1507
1508
1509
1510
1511 if (length == PAGE_SIZE)
1512 try_to_release_page(page, 0);
1513 out:
1514 return;
1515 }
1516 EXPORT_SYMBOL(block_invalidatepage);
1517
1518
1519
1520
1521
1522
1523
1524 void create_empty_buffers(struct page *page,
1525 unsigned long blocksize, unsigned long b_state)
1526 {
1527 struct buffer_head *bh, *head, *tail;
1528
1529 head = alloc_page_buffers(page, blocksize, true);
1530 bh = head;
1531 do {
1532 bh->b_state |= b_state;
1533 tail = bh;
1534 bh = bh->b_this_page;
1535 } while (bh);
1536 tail->b_this_page = head;
1537
1538 spin_lock(&page->mapping->private_lock);
1539 if (PageUptodate(page) || PageDirty(page)) {
1540 bh = head;
1541 do {
1542 if (PageDirty(page))
1543 set_buffer_dirty(bh);
1544 if (PageUptodate(page))
1545 set_buffer_uptodate(bh);
1546 bh = bh->b_this_page;
1547 } while (bh != head);
1548 }
1549 attach_page_buffers(page, head);
1550 spin_unlock(&page->mapping->private_lock);
1551 }
1552 EXPORT_SYMBOL(create_empty_buffers);
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574 void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
1575 {
1576 struct inode *bd_inode = bdev->bd_inode;
1577 struct address_space *bd_mapping = bd_inode->i_mapping;
1578 struct pagevec pvec;
1579 pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
1580 pgoff_t end;
1581 int i, count;
1582 struct buffer_head *bh;
1583 struct buffer_head *head;
1584
1585 end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
1586 pagevec_init(&pvec);
1587 while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
1588 count = pagevec_count(&pvec);
1589 for (i = 0; i < count; i++) {
1590 struct page *page = pvec.pages[i];
1591
1592 if (!page_has_buffers(page))
1593 continue;
1594
1595
1596
1597
1598
1599 lock_page(page);
1600
1601 if (!page_has_buffers(page))
1602 goto unlock_page;
1603 head = page_buffers(page);
1604 bh = head;
1605 do {
1606 if (!buffer_mapped(bh) || (bh->b_blocknr < block))
1607 goto next;
1608 if (bh->b_blocknr >= block + len)
1609 break;
1610 clear_buffer_dirty(bh);
1611 wait_on_buffer(bh);
1612 clear_buffer_req(bh);
1613 next:
1614 bh = bh->b_this_page;
1615 } while (bh != head);
1616 unlock_page:
1617 unlock_page(page);
1618 }
1619 pagevec_release(&pvec);
1620 cond_resched();
1621
1622 if (index > end || !index)
1623 break;
1624 }
1625 }
1626 EXPORT_SYMBOL(clean_bdev_aliases);
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636 static inline int block_size_bits(unsigned int blocksize)
1637 {
1638 return ilog2(blocksize);
1639 }
1640
1641 static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1642 {
1643 BUG_ON(!PageLocked(page));
1644
1645 if (!page_has_buffers(page))
1646 create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
1647 b_state);
1648 return page_buffers(page);
1649 }
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680 int __block_write_full_page(struct inode *inode, struct page *page,
1681 get_block_t *get_block, struct writeback_control *wbc,
1682 bh_end_io_t *handler)
1683 {
1684 int err;
1685 sector_t block;
1686 sector_t last_block;
1687 struct buffer_head *bh, *head;
1688 unsigned int blocksize, bbits;
1689 int nr_underway = 0;
1690 int write_flags = wbc_to_write_flags(wbc);
1691
1692 head = create_page_buffers(page, inode,
1693 (1 << BH_Dirty)|(1 << BH_Uptodate));
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705 bh = head;
1706 blocksize = bh->b_size;
1707 bbits = block_size_bits(blocksize);
1708
1709 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1710 last_block = (i_size_read(inode) - 1) >> bbits;
1711
1712
1713
1714
1715
1716 do {
1717 if (block > last_block) {
1718
1719
1720
1721
1722
1723
1724
1725
1726 clear_buffer_dirty(bh);
1727 set_buffer_uptodate(bh);
1728 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1729 buffer_dirty(bh)) {
1730 WARN_ON(bh->b_size != blocksize);
1731 err = get_block(inode, block, bh, 1);
1732 if (err)
1733 goto recover;
1734 clear_buffer_delay(bh);
1735 if (buffer_new(bh)) {
1736
1737 clear_buffer_new(bh);
1738 clean_bdev_bh_alias(bh);
1739 }
1740 }
1741 bh = bh->b_this_page;
1742 block++;
1743 } while (bh != head);
1744
1745 do {
1746 if (!buffer_mapped(bh))
1747 continue;
1748
1749
1750
1751
1752
1753
1754
1755 if (wbc->sync_mode != WB_SYNC_NONE) {
1756 lock_buffer(bh);
1757 } else if (!trylock_buffer(bh)) {
1758 redirty_page_for_writepage(wbc, page);
1759 continue;
1760 }
1761 if (test_clear_buffer_dirty(bh)) {
1762 mark_buffer_async_write_endio(bh, handler);
1763 } else {
1764 unlock_buffer(bh);
1765 }
1766 } while ((bh = bh->b_this_page) != head);
1767
1768
1769
1770
1771
1772 BUG_ON(PageWriteback(page));
1773 set_page_writeback(page);
1774
1775 do {
1776 struct buffer_head *next = bh->b_this_page;
1777 if (buffer_async_write(bh)) {
1778 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1779 inode->i_write_hint, wbc);
1780 nr_underway++;
1781 }
1782 bh = next;
1783 } while (bh != head);
1784 unlock_page(page);
1785
1786 err = 0;
1787 done:
1788 if (nr_underway == 0) {
1789
1790
1791
1792
1793
1794 end_page_writeback(page);
1795
1796
1797
1798
1799
1800 }
1801 return err;
1802
1803 recover:
1804
1805
1806
1807
1808
1809
1810 bh = head;
1811
1812 do {
1813 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1814 !buffer_delay(bh)) {
1815 lock_buffer(bh);
1816 mark_buffer_async_write_endio(bh, handler);
1817 } else {
1818
1819
1820
1821
1822 clear_buffer_dirty(bh);
1823 }
1824 } while ((bh = bh->b_this_page) != head);
1825 SetPageError(page);
1826 BUG_ON(PageWriteback(page));
1827 mapping_set_error(page->mapping, err);
1828 set_page_writeback(page);
1829 do {
1830 struct buffer_head *next = bh->b_this_page;
1831 if (buffer_async_write(bh)) {
1832 clear_buffer_dirty(bh);
1833 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1834 inode->i_write_hint, wbc);
1835 nr_underway++;
1836 }
1837 bh = next;
1838 } while (bh != head);
1839 unlock_page(page);
1840 goto done;
1841 }
1842 EXPORT_SYMBOL(__block_write_full_page);
1843
1844
1845
1846
1847
1848
1849 void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1850 {
1851 unsigned int block_start, block_end;
1852 struct buffer_head *head, *bh;
1853
1854 BUG_ON(!PageLocked(page));
1855 if (!page_has_buffers(page))
1856 return;
1857
1858 bh = head = page_buffers(page);
1859 block_start = 0;
1860 do {
1861 block_end = block_start + bh->b_size;
1862
1863 if (buffer_new(bh)) {
1864 if (block_end > from && block_start < to) {
1865 if (!PageUptodate(page)) {
1866 unsigned start, size;
1867
1868 start = max(from, block_start);
1869 size = min(to, block_end) - start;
1870
1871 zero_user(page, start, size);
1872 set_buffer_uptodate(bh);
1873 }
1874
1875 clear_buffer_new(bh);
1876 mark_buffer_dirty(bh);
1877 }
1878 }
1879
1880 block_start = block_end;
1881 bh = bh->b_this_page;
1882 } while (bh != head);
1883 }
1884 EXPORT_SYMBOL(page_zero_new_buffers);
1885
1886 static void
1887 iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
1888 struct iomap *iomap)
1889 {
1890 loff_t offset = block << inode->i_blkbits;
1891
1892 bh->b_bdev = iomap->bdev;
1893
1894
1895
1896
1897
1898
1899
1900 BUG_ON(offset >= iomap->offset + iomap->length);
1901
1902 switch (iomap->type) {
1903 case IOMAP_HOLE:
1904
1905
1906
1907
1908
1909 if (!buffer_uptodate(bh) ||
1910 (offset >= i_size_read(inode)))
1911 set_buffer_new(bh);
1912 break;
1913 case IOMAP_DELALLOC:
1914 if (!buffer_uptodate(bh) ||
1915 (offset >= i_size_read(inode)))
1916 set_buffer_new(bh);
1917 set_buffer_uptodate(bh);
1918 set_buffer_mapped(bh);
1919 set_buffer_delay(bh);
1920 break;
1921 case IOMAP_UNWRITTEN:
1922
1923
1924
1925
1926
1927 set_buffer_new(bh);
1928 set_buffer_unwritten(bh);
1929
1930 case IOMAP_MAPPED:
1931 if ((iomap->flags & IOMAP_F_NEW) ||
1932 offset >= i_size_read(inode))
1933 set_buffer_new(bh);
1934 bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
1935 inode->i_blkbits;
1936 set_buffer_mapped(bh);
1937 break;
1938 }
1939 }
1940
1941 int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
1942 get_block_t *get_block, struct iomap *iomap)
1943 {
1944 unsigned from = pos & (PAGE_SIZE - 1);
1945 unsigned to = from + len;
1946 struct inode *inode = page->mapping->host;
1947 unsigned block_start, block_end;
1948 sector_t block;
1949 int err = 0;
1950 unsigned blocksize, bbits;
1951 struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1952
1953 BUG_ON(!PageLocked(page));
1954 BUG_ON(from > PAGE_SIZE);
1955 BUG_ON(to > PAGE_SIZE);
1956 BUG_ON(from > to);
1957
1958 head = create_page_buffers(page, inode, 0);
1959 blocksize = head->b_size;
1960 bbits = block_size_bits(blocksize);
1961
1962 block = (sector_t)page->index << (PAGE_SHIFT - bbits);
1963
1964 for(bh = head, block_start = 0; bh != head || !block_start;
1965 block++, block_start=block_end, bh = bh->b_this_page) {
1966 block_end = block_start + blocksize;
1967 if (block_end <= from || block_start >= to) {
1968 if (PageUptodate(page)) {
1969 if (!buffer_uptodate(bh))
1970 set_buffer_uptodate(bh);
1971 }
1972 continue;
1973 }
1974 if (buffer_new(bh))
1975 clear_buffer_new(bh);
1976 if (!buffer_mapped(bh)) {
1977 WARN_ON(bh->b_size != blocksize);
1978 if (get_block) {
1979 err = get_block(inode, block, bh, 1);
1980 if (err)
1981 break;
1982 } else {
1983 iomap_to_bh(inode, block, bh, iomap);
1984 }
1985
1986 if (buffer_new(bh)) {
1987 clean_bdev_bh_alias(bh);
1988 if (PageUptodate(page)) {
1989 clear_buffer_new(bh);
1990 set_buffer_uptodate(bh);
1991 mark_buffer_dirty(bh);
1992 continue;
1993 }
1994 if (block_end > to || block_start < from)
1995 zero_user_segments(page,
1996 to, block_end,
1997 block_start, from);
1998 continue;
1999 }
2000 }
2001 if (PageUptodate(page)) {
2002 if (!buffer_uptodate(bh))
2003 set_buffer_uptodate(bh);
2004 continue;
2005 }
2006 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
2007 !buffer_unwritten(bh) &&
2008 (block_start < from || block_end > to)) {
2009 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2010 *wait_bh++=bh;
2011 }
2012 }
2013
2014
2015
2016 while(wait_bh > wait) {
2017 wait_on_buffer(*--wait_bh);
2018 if (!buffer_uptodate(*wait_bh))
2019 err = -EIO;
2020 }
2021 if (unlikely(err))
2022 page_zero_new_buffers(page, from, to);
2023 return err;
2024 }
2025
2026 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
2027 get_block_t *get_block)
2028 {
2029 return __block_write_begin_int(page, pos, len, get_block, NULL);
2030 }
2031 EXPORT_SYMBOL(__block_write_begin);
2032
2033 static int __block_commit_write(struct inode *inode, struct page *page,
2034 unsigned from, unsigned to)
2035 {
2036 unsigned block_start, block_end;
2037 int partial = 0;
2038 unsigned blocksize;
2039 struct buffer_head *bh, *head;
2040
2041 bh = head = page_buffers(page);
2042 blocksize = bh->b_size;
2043
2044 block_start = 0;
2045 do {
2046 block_end = block_start + blocksize;
2047 if (block_end <= from || block_start >= to) {
2048 if (!buffer_uptodate(bh))
2049 partial = 1;
2050 } else {
2051 set_buffer_uptodate(bh);
2052 mark_buffer_dirty(bh);
2053 }
2054 clear_buffer_new(bh);
2055
2056 block_start = block_end;
2057 bh = bh->b_this_page;
2058 } while (bh != head);
2059
2060
2061
2062
2063
2064
2065
2066 if (!partial)
2067 SetPageUptodate(page);
2068 return 0;
2069 }
2070
2071
2072
2073
2074
2075
2076
2077 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2078 unsigned flags, struct page **pagep, get_block_t *get_block)
2079 {
2080 pgoff_t index = pos >> PAGE_SHIFT;
2081 struct page *page;
2082 int status;
2083
2084 page = grab_cache_page_write_begin(mapping, index, flags);
2085 if (!page)
2086 return -ENOMEM;
2087
2088 status = __block_write_begin(page, pos, len, get_block);
2089 if (unlikely(status)) {
2090 unlock_page(page);
2091 put_page(page);
2092 page = NULL;
2093 }
2094
2095 *pagep = page;
2096 return status;
2097 }
2098 EXPORT_SYMBOL(block_write_begin);
2099
2100 int block_write_end(struct file *file, struct address_space *mapping,
2101 loff_t pos, unsigned len, unsigned copied,
2102 struct page *page, void *fsdata)
2103 {
2104 struct inode *inode = mapping->host;
2105 unsigned start;
2106
2107 start = pos & (PAGE_SIZE - 1);
2108
2109 if (unlikely(copied < len)) {
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122 if (!PageUptodate(page))
2123 copied = 0;
2124
2125 page_zero_new_buffers(page, start+copied, start+len);
2126 }
2127 flush_dcache_page(page);
2128
2129
2130 __block_commit_write(inode, page, start, start+copied);
2131
2132 return copied;
2133 }
2134 EXPORT_SYMBOL(block_write_end);
2135
2136 int generic_write_end(struct file *file, struct address_space *mapping,
2137 loff_t pos, unsigned len, unsigned copied,
2138 struct page *page, void *fsdata)
2139 {
2140 struct inode *inode = mapping->host;
2141 loff_t old_size = inode->i_size;
2142 bool i_size_changed = false;
2143
2144 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2145
2146
2147
2148
2149
2150
2151
2152
2153 if (pos + copied > inode->i_size) {
2154 i_size_write(inode, pos + copied);
2155 i_size_changed = true;
2156 }
2157
2158 unlock_page(page);
2159 put_page(page);
2160
2161 if (old_size < pos)
2162 pagecache_isize_extended(inode, old_size, pos);
2163
2164
2165
2166
2167
2168
2169 if (i_size_changed)
2170 mark_inode_dirty(inode);
2171 return copied;
2172 }
2173 EXPORT_SYMBOL(generic_write_end);
2174
2175
2176
2177
2178
2179
2180
2181
2182 int block_is_partially_uptodate(struct page *page, unsigned long from,
2183 unsigned long count)
2184 {
2185 unsigned block_start, block_end, blocksize;
2186 unsigned to;
2187 struct buffer_head *bh, *head;
2188 int ret = 1;
2189
2190 if (!page_has_buffers(page))
2191 return 0;
2192
2193 head = page_buffers(page);
2194 blocksize = head->b_size;
2195 to = min_t(unsigned, PAGE_SIZE - from, count);
2196 to = from + to;
2197 if (from < blocksize && to > PAGE_SIZE - blocksize)
2198 return 0;
2199
2200 bh = head;
2201 block_start = 0;
2202 do {
2203 block_end = block_start + blocksize;
2204 if (block_end > from && block_start < to) {
2205 if (!buffer_uptodate(bh)) {
2206 ret = 0;
2207 break;
2208 }
2209 if (block_end >= to)
2210 break;
2211 }
2212 block_start = block_end;
2213 bh = bh->b_this_page;
2214 } while (bh != head);
2215
2216 return ret;
2217 }
2218 EXPORT_SYMBOL(block_is_partially_uptodate);
2219
2220
2221
2222
2223
2224
2225
2226
2227 int block_read_full_page(struct page *page, get_block_t *get_block)
2228 {
2229 struct inode *inode = page->mapping->host;
2230 sector_t iblock, lblock;
2231 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2232 unsigned int blocksize, bbits;
2233 int nr, i;
2234 int fully_mapped = 1;
2235
2236 head = create_page_buffers(page, inode, 0);
2237 blocksize = head->b_size;
2238 bbits = block_size_bits(blocksize);
2239
2240 iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
2241 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2242 bh = head;
2243 nr = 0;
2244 i = 0;
2245
2246 do {
2247 if (buffer_uptodate(bh))
2248 continue;
2249
2250 if (!buffer_mapped(bh)) {
2251 int err = 0;
2252
2253 fully_mapped = 0;
2254 if (iblock < lblock) {
2255 WARN_ON(bh->b_size != blocksize);
2256 err = get_block(inode, iblock, bh, 0);
2257 if (err)
2258 SetPageError(page);
2259 }
2260 if (!buffer_mapped(bh)) {
2261 zero_user(page, i * blocksize, blocksize);
2262 if (!err)
2263 set_buffer_uptodate(bh);
2264 continue;
2265 }
2266
2267
2268
2269
2270 if (buffer_uptodate(bh))
2271 continue;
2272 }
2273 arr[nr++] = bh;
2274 } while (i++, iblock++, (bh = bh->b_this_page) != head);
2275
2276 if (fully_mapped)
2277 SetPageMappedToDisk(page);
2278
2279 if (!nr) {
2280
2281
2282
2283
2284 if (!PageError(page))
2285 SetPageUptodate(page);
2286 unlock_page(page);
2287 return 0;
2288 }
2289
2290
2291 for (i = 0; i < nr; i++) {
2292 bh = arr[i];
2293 lock_buffer(bh);
2294 mark_buffer_async_read(bh);
2295 }
2296
2297
2298
2299
2300
2301
2302 for (i = 0; i < nr; i++) {
2303 bh = arr[i];
2304 if (buffer_uptodate(bh))
2305 end_buffer_async_read(bh, 1);
2306 else
2307 submit_bh(REQ_OP_READ, 0, bh);
2308 }
2309 return 0;
2310 }
2311 EXPORT_SYMBOL(block_read_full_page);
2312
2313
2314
2315
2316
2317 int generic_cont_expand_simple(struct inode *inode, loff_t size)
2318 {
2319 struct address_space *mapping = inode->i_mapping;
2320 struct page *page;
2321 void *fsdata;
2322 int err;
2323
2324 err = inode_newsize_ok(inode, size);
2325 if (err)
2326 goto out;
2327
2328 err = pagecache_write_begin(NULL, mapping, size, 0,
2329 AOP_FLAG_CONT_EXPAND, &page, &fsdata);
2330 if (err)
2331 goto out;
2332
2333 err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
2334 BUG_ON(err > 0);
2335
2336 out:
2337 return err;
2338 }
2339 EXPORT_SYMBOL(generic_cont_expand_simple);
2340
2341 static int cont_expand_zero(struct file *file, struct address_space *mapping,
2342 loff_t pos, loff_t *bytes)
2343 {
2344 struct inode *inode = mapping->host;
2345 unsigned int blocksize = i_blocksize(inode);
2346 struct page *page;
2347 void *fsdata;
2348 pgoff_t index, curidx;
2349 loff_t curpos;
2350 unsigned zerofrom, offset, len;
2351 int err = 0;
2352
2353 index = pos >> PAGE_SHIFT;
2354 offset = pos & ~PAGE_MASK;
2355
2356 while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
2357 zerofrom = curpos & ~PAGE_MASK;
2358 if (zerofrom & (blocksize-1)) {
2359 *bytes |= (blocksize-1);
2360 (*bytes)++;
2361 }
2362 len = PAGE_SIZE - zerofrom;
2363
2364 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2365 &page, &fsdata);
2366 if (err)
2367 goto out;
2368 zero_user(page, zerofrom, len);
2369 err = pagecache_write_end(file, mapping, curpos, len, len,
2370 page, fsdata);
2371 if (err < 0)
2372 goto out;
2373 BUG_ON(err != len);
2374 err = 0;
2375
2376 balance_dirty_pages_ratelimited(mapping);
2377
2378 if (fatal_signal_pending(current)) {
2379 err = -EINTR;
2380 goto out;
2381 }
2382 }
2383
2384
2385 if (index == curidx) {
2386 zerofrom = curpos & ~PAGE_MASK;
2387
2388 if (offset <= zerofrom) {
2389 goto out;
2390 }
2391 if (zerofrom & (blocksize-1)) {
2392 *bytes |= (blocksize-1);
2393 (*bytes)++;
2394 }
2395 len = offset - zerofrom;
2396
2397 err = pagecache_write_begin(file, mapping, curpos, len, 0,
2398 &page, &fsdata);
2399 if (err)
2400 goto out;
2401 zero_user(page, zerofrom, len);
2402 err = pagecache_write_end(file, mapping, curpos, len, len,
2403 page, fsdata);
2404 if (err < 0)
2405 goto out;
2406 BUG_ON(err != len);
2407 err = 0;
2408 }
2409 out:
2410 return err;
2411 }
2412
2413
2414
2415
2416
2417 int cont_write_begin(struct file *file, struct address_space *mapping,
2418 loff_t pos, unsigned len, unsigned flags,
2419 struct page **pagep, void **fsdata,
2420 get_block_t *get_block, loff_t *bytes)
2421 {
2422 struct inode *inode = mapping->host;
2423 unsigned int blocksize = i_blocksize(inode);
2424 unsigned int zerofrom;
2425 int err;
2426
2427 err = cont_expand_zero(file, mapping, pos, bytes);
2428 if (err)
2429 return err;
2430
2431 zerofrom = *bytes & ~PAGE_MASK;
2432 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
2433 *bytes |= (blocksize-1);
2434 (*bytes)++;
2435 }
2436
2437 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2438 }
2439 EXPORT_SYMBOL(cont_write_begin);
2440
2441 int block_commit_write(struct page *page, unsigned from, unsigned to)
2442 {
2443 struct inode *inode = page->mapping->host;
2444 __block_commit_write(inode,page,from,to);
2445 return 0;
2446 }
2447 EXPORT_SYMBOL(block_commit_write);
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2468 get_block_t get_block)
2469 {
2470 struct page *page = vmf->page;
2471 struct inode *inode = file_inode(vma->vm_file);
2472 unsigned long end;
2473 loff_t size;
2474 int ret;
2475
2476 lock_page(page);
2477 size = i_size_read(inode);
2478 if ((page->mapping != inode->i_mapping) ||
2479 (page_offset(page) > size)) {
2480
2481 ret = -EFAULT;
2482 goto out_unlock;
2483 }
2484
2485
2486 if (((page->index + 1) << PAGE_SHIFT) > size)
2487 end = size & ~PAGE_MASK;
2488 else
2489 end = PAGE_SIZE;
2490
2491 ret = __block_write_begin(page, 0, end, get_block);
2492 if (!ret)
2493 ret = block_commit_write(page, 0, end);
2494
2495 if (unlikely(ret < 0))
2496 goto out_unlock;
2497 set_page_dirty(page);
2498 wait_for_stable_page(page);
2499 return 0;
2500 out_unlock:
2501 unlock_page(page);
2502 return ret;
2503 }
2504 EXPORT_SYMBOL(block_page_mkwrite);
2505
2506
2507
2508
2509
2510
2511 static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2512 {
2513 __end_buffer_read_notouch(bh, uptodate);
2514 }
2515
2516
2517
2518
2519
2520
2521 static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2522 {
2523 struct buffer_head *bh;
2524
2525 BUG_ON(!PageLocked(page));
2526
2527 spin_lock(&page->mapping->private_lock);
2528 bh = head;
2529 do {
2530 if (PageDirty(page))
2531 set_buffer_dirty(bh);
2532 if (!bh->b_this_page)
2533 bh->b_this_page = head;
2534 bh = bh->b_this_page;
2535 } while (bh != head);
2536 attach_page_buffers(page, head);
2537 spin_unlock(&page->mapping->private_lock);
2538 }
2539
2540
2541
2542
2543
2544
2545 int nobh_write_begin(struct address_space *mapping,
2546 loff_t pos, unsigned len, unsigned flags,
2547 struct page **pagep, void **fsdata,
2548 get_block_t *get_block)
2549 {
2550 struct inode *inode = mapping->host;
2551 const unsigned blkbits = inode->i_blkbits;
2552 const unsigned blocksize = 1 << blkbits;
2553 struct buffer_head *head, *bh;
2554 struct page *page;
2555 pgoff_t index;
2556 unsigned from, to;
2557 unsigned block_in_page;
2558 unsigned block_start, block_end;
2559 sector_t block_in_file;
2560 int nr_reads = 0;
2561 int ret = 0;
2562 int is_mapped_to_disk = 1;
2563
2564 index = pos >> PAGE_SHIFT;
2565 from = pos & (PAGE_SIZE - 1);
2566 to = from + len;
2567
2568 page = grab_cache_page_write_begin(mapping, index, flags);
2569 if (!page)
2570 return -ENOMEM;
2571 *pagep = page;
2572 *fsdata = NULL;
2573
2574 if (page_has_buffers(page)) {
2575 ret = __block_write_begin(page, pos, len, get_block);
2576 if (unlikely(ret))
2577 goto out_release;
2578 return ret;
2579 }
2580
2581 if (PageMappedToDisk(page))
2582 return 0;
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593 head = alloc_page_buffers(page, blocksize, false);
2594 if (!head) {
2595 ret = -ENOMEM;
2596 goto out_release;
2597 }
2598
2599 block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
2600
2601
2602
2603
2604
2605
2606 for (block_start = 0, block_in_page = 0, bh = head;
2607 block_start < PAGE_SIZE;
2608 block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2609 int create;
2610
2611 block_end = block_start + blocksize;
2612 bh->b_state = 0;
2613 create = 1;
2614 if (block_start >= to)
2615 create = 0;
2616 ret = get_block(inode, block_in_file + block_in_page,
2617 bh, create);
2618 if (ret)
2619 goto failed;
2620 if (!buffer_mapped(bh))
2621 is_mapped_to_disk = 0;
2622 if (buffer_new(bh))
2623 clean_bdev_bh_alias(bh);
2624 if (PageUptodate(page)) {
2625 set_buffer_uptodate(bh);
2626 continue;
2627 }
2628 if (buffer_new(bh) || !buffer_mapped(bh)) {
2629 zero_user_segments(page, block_start, from,
2630 to, block_end);
2631 continue;
2632 }
2633 if (buffer_uptodate(bh))
2634 continue;
2635 if (block_start < from || block_end > to) {
2636 lock_buffer(bh);
2637 bh->b_end_io = end_buffer_read_nobh;
2638 submit_bh(REQ_OP_READ, 0, bh);
2639 nr_reads++;
2640 }
2641 }
2642
2643 if (nr_reads) {
2644
2645
2646
2647
2648
2649 for (bh = head; bh; bh = bh->b_this_page) {
2650 wait_on_buffer(bh);
2651 if (!buffer_uptodate(bh))
2652 ret = -EIO;
2653 }
2654 if (ret)
2655 goto failed;
2656 }
2657
2658 if (is_mapped_to_disk)
2659 SetPageMappedToDisk(page);
2660
2661 *fsdata = head;
2662
2663 return 0;
2664
2665 failed:
2666 BUG_ON(!ret);
2667
2668
2669
2670
2671
2672
2673
2674 attach_nobh_buffers(page, head);
2675 page_zero_new_buffers(page, from, to);
2676
2677 out_release:
2678 unlock_page(page);
2679 put_page(page);
2680 *pagep = NULL;
2681
2682 return ret;
2683 }
2684 EXPORT_SYMBOL(nobh_write_begin);
2685
2686 int nobh_write_end(struct file *file, struct address_space *mapping,
2687 loff_t pos, unsigned len, unsigned copied,
2688 struct page *page, void *fsdata)
2689 {
2690 struct inode *inode = page->mapping->host;
2691 struct buffer_head *head = fsdata;
2692 struct buffer_head *bh;
2693 BUG_ON(fsdata != NULL && page_has_buffers(page));
2694
2695 if (unlikely(copied < len) && head)
2696 attach_nobh_buffers(page, head);
2697 if (page_has_buffers(page))
2698 return generic_write_end(file, mapping, pos, len,
2699 copied, page, fsdata);
2700
2701 SetPageUptodate(page);
2702 set_page_dirty(page);
2703 if (pos+copied > inode->i_size) {
2704 i_size_write(inode, pos+copied);
2705 mark_inode_dirty(inode);
2706 }
2707
2708 unlock_page(page);
2709 put_page(page);
2710
2711 while (head) {
2712 bh = head;
2713 head = head->b_this_page;
2714 free_buffer_head(bh);
2715 }
2716
2717 return copied;
2718 }
2719 EXPORT_SYMBOL(nobh_write_end);
2720
2721
2722
2723
2724
2725
2726 int nobh_writepage(struct page *page, get_block_t *get_block,
2727 struct writeback_control *wbc)
2728 {
2729 struct inode * const inode = page->mapping->host;
2730 loff_t i_size = i_size_read(inode);
2731 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2732 unsigned offset;
2733 int ret;
2734
2735
2736 if (page->index < end_index)
2737 goto out;
2738
2739
2740 offset = i_size & (PAGE_SIZE-1);
2741 if (page->index >= end_index+1 || !offset) {
2742
2743
2744
2745
2746
2747 #if 0
2748
2749 if (page->mapping->a_ops->invalidatepage)
2750 page->mapping->a_ops->invalidatepage(page, offset);
2751 #endif
2752 unlock_page(page);
2753 return 0;
2754 }
2755
2756
2757
2758
2759
2760
2761
2762
2763 zero_user_segment(page, offset, PAGE_SIZE);
2764 out:
2765 ret = mpage_writepage(page, get_block, wbc);
2766 if (ret == -EAGAIN)
2767 ret = __block_write_full_page(inode, page, get_block, wbc,
2768 end_buffer_async_write);
2769 return ret;
2770 }
2771 EXPORT_SYMBOL(nobh_writepage);
2772
2773 int nobh_truncate_page(struct address_space *mapping,
2774 loff_t from, get_block_t *get_block)
2775 {
2776 pgoff_t index = from >> PAGE_SHIFT;
2777 unsigned offset = from & (PAGE_SIZE-1);
2778 unsigned blocksize;
2779 sector_t iblock;
2780 unsigned length, pos;
2781 struct inode *inode = mapping->host;
2782 struct page *page;
2783 struct buffer_head map_bh;
2784 int err;
2785
2786 blocksize = i_blocksize(inode);
2787 length = offset & (blocksize - 1);
2788
2789
2790 if (!length)
2791 return 0;
2792
2793 length = blocksize - length;
2794 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2795
2796 page = grab_cache_page(mapping, index);
2797 err = -ENOMEM;
2798 if (!page)
2799 goto out;
2800
2801 if (page_has_buffers(page)) {
2802 has_buffers:
2803 unlock_page(page);
2804 put_page(page);
2805 return block_truncate_page(mapping, from, get_block);
2806 }
2807
2808
2809 pos = blocksize;
2810 while (offset >= pos) {
2811 iblock++;
2812 pos += blocksize;
2813 }
2814
2815 map_bh.b_size = blocksize;
2816 map_bh.b_state = 0;
2817 err = get_block(inode, iblock, &map_bh, 0);
2818 if (err)
2819 goto unlock;
2820
2821 if (!buffer_mapped(&map_bh))
2822 goto unlock;
2823
2824
2825 if (!PageUptodate(page)) {
2826 err = mapping->a_ops->readpage(NULL, page);
2827 if (err) {
2828 put_page(page);
2829 goto out;
2830 }
2831 lock_page(page);
2832 if (!PageUptodate(page)) {
2833 err = -EIO;
2834 goto unlock;
2835 }
2836 if (page_has_buffers(page))
2837 goto has_buffers;
2838 }
2839 zero_user(page, offset, length);
2840 set_page_dirty(page);
2841 err = 0;
2842
2843 unlock:
2844 unlock_page(page);
2845 put_page(page);
2846 out:
2847 return err;
2848 }
2849 EXPORT_SYMBOL(nobh_truncate_page);
2850
2851 int block_truncate_page(struct address_space *mapping,
2852 loff_t from, get_block_t *get_block)
2853 {
2854 pgoff_t index = from >> PAGE_SHIFT;
2855 unsigned offset = from & (PAGE_SIZE-1);
2856 unsigned blocksize;
2857 sector_t iblock;
2858 unsigned length, pos;
2859 struct inode *inode = mapping->host;
2860 struct page *page;
2861 struct buffer_head *bh;
2862 int err;
2863
2864 blocksize = i_blocksize(inode);
2865 length = offset & (blocksize - 1);
2866
2867
2868 if (!length)
2869 return 0;
2870
2871 length = blocksize - length;
2872 iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
2873
2874 page = grab_cache_page(mapping, index);
2875 err = -ENOMEM;
2876 if (!page)
2877 goto out;
2878
2879 if (!page_has_buffers(page))
2880 create_empty_buffers(page, blocksize, 0);
2881
2882
2883 bh = page_buffers(page);
2884 pos = blocksize;
2885 while (offset >= pos) {
2886 bh = bh->b_this_page;
2887 iblock++;
2888 pos += blocksize;
2889 }
2890
2891 err = 0;
2892 if (!buffer_mapped(bh)) {
2893 WARN_ON(bh->b_size != blocksize);
2894 err = get_block(inode, iblock, bh, 0);
2895 if (err)
2896 goto unlock;
2897
2898 if (!buffer_mapped(bh))
2899 goto unlock;
2900 }
2901
2902
2903 if (PageUptodate(page))
2904 set_buffer_uptodate(bh);
2905
2906 if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
2907 err = -EIO;
2908 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
2909 wait_on_buffer(bh);
2910
2911 if (!buffer_uptodate(bh))
2912 goto unlock;
2913 }
2914
2915 zero_user(page, offset, length);
2916 mark_buffer_dirty(bh);
2917 err = 0;
2918
2919 unlock:
2920 unlock_page(page);
2921 put_page(page);
2922 out:
2923 return err;
2924 }
2925 EXPORT_SYMBOL(block_truncate_page);
2926
2927
2928
2929
2930 int block_write_full_page(struct page *page, get_block_t *get_block,
2931 struct writeback_control *wbc)
2932 {
2933 struct inode * const inode = page->mapping->host;
2934 loff_t i_size = i_size_read(inode);
2935 const pgoff_t end_index = i_size >> PAGE_SHIFT;
2936 unsigned offset;
2937
2938
2939 if (page->index < end_index)
2940 return __block_write_full_page(inode, page, get_block, wbc,
2941 end_buffer_async_write);
2942
2943
2944 offset = i_size & (PAGE_SIZE-1);
2945 if (page->index >= end_index+1 || !offset) {
2946
2947
2948
2949
2950
2951 do_invalidatepage(page, 0, PAGE_SIZE);
2952 unlock_page(page);
2953 return 0;
2954 }
2955
2956
2957
2958
2959
2960
2961
2962
2963 zero_user_segment(page, offset, PAGE_SIZE);
2964 return __block_write_full_page(inode, page, get_block, wbc,
2965 end_buffer_async_write);
2966 }
2967 EXPORT_SYMBOL(block_write_full_page);
2968
2969 sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2970 get_block_t *get_block)
2971 {
2972 struct inode *inode = mapping->host;
2973 struct buffer_head tmp = {
2974 .b_size = i_blocksize(inode),
2975 };
2976
2977 get_block(inode, block, &tmp, 0);
2978 return tmp.b_blocknr;
2979 }
2980 EXPORT_SYMBOL(generic_block_bmap);
2981
2982 static void end_bio_bh_io_sync(struct bio *bio)
2983 {
2984 struct buffer_head *bh = bio->bi_private;
2985
2986 if (unlikely(bio_flagged(bio, BIO_QUIET)))
2987 set_bit(BH_Quiet, &bh->b_state);
2988
2989 bh->b_end_io(bh, !bio->bi_status);
2990 bio_put(bio);
2991 }
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005 void guard_bio_eod(struct bio *bio)
3006 {
3007 sector_t maxsector;
3008 struct hd_struct *part;
3009
3010 rcu_read_lock();
3011 part = __disk_get_part(bio->bi_disk, bio->bi_partno);
3012 if (part)
3013 maxsector = part_nr_sects_read(part);
3014 else
3015 maxsector = get_capacity(bio->bi_disk);
3016 rcu_read_unlock();
3017
3018 if (!maxsector)
3019 return;
3020
3021
3022
3023
3024
3025
3026 if (unlikely(bio->bi_iter.bi_sector >= maxsector))
3027 return;
3028
3029 maxsector -= bio->bi_iter.bi_sector;
3030 if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
3031 return;
3032
3033 bio_truncate(bio, maxsector << 9);
3034 }
3035
3036 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3037 enum rw_hint write_hint, struct writeback_control *wbc)
3038 {
3039 struct bio *bio;
3040
3041 BUG_ON(!buffer_locked(bh));
3042 BUG_ON(!buffer_mapped(bh));
3043 BUG_ON(!bh->b_end_io);
3044 BUG_ON(buffer_delay(bh));
3045 BUG_ON(buffer_unwritten(bh));
3046
3047
3048
3049
3050 if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
3051 clear_buffer_write_io_error(bh);
3052
3053
3054
3055
3056
3057 bio = bio_alloc(GFP_NOIO, 1);
3058
3059 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3060 bio_set_dev(bio, bh->b_bdev);
3061 bio->bi_write_hint = write_hint;
3062
3063 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3064 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
3065
3066 bio->bi_end_io = end_bio_bh_io_sync;
3067 bio->bi_private = bh;
3068
3069 if (buffer_meta(bh))
3070 op_flags |= REQ_META;
3071 if (buffer_prio(bh))
3072 op_flags |= REQ_PRIO;
3073 bio_set_op_attrs(bio, op, op_flags);
3074
3075
3076 guard_bio_eod(bio);
3077
3078 if (wbc) {
3079 wbc_init_bio(wbc, bio);
3080 wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
3081 }
3082
3083 submit_bio(bio);
3084 return 0;
3085 }
3086
3087 int submit_bh(int op, int op_flags, struct buffer_head *bh)
3088 {
3089 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3090 }
3091 EXPORT_SYMBOL(submit_bh);
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119 void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
3120 {
3121 int i;
3122
3123 for (i = 0; i < nr; i++) {
3124 struct buffer_head *bh = bhs[i];
3125
3126 if (!trylock_buffer(bh))
3127 continue;
3128 if (op == WRITE) {
3129 if (test_clear_buffer_dirty(bh)) {
3130 bh->b_end_io = end_buffer_write_sync;
3131 get_bh(bh);
3132 submit_bh(op, op_flags, bh);
3133 continue;
3134 }
3135 } else {
3136 if (!buffer_uptodate(bh)) {
3137 bh->b_end_io = end_buffer_read_sync;
3138 get_bh(bh);
3139 submit_bh(op, op_flags, bh);
3140 continue;
3141 }
3142 }
3143 unlock_buffer(bh);
3144 }
3145 }
3146 EXPORT_SYMBOL(ll_rw_block);
3147
3148 void write_dirty_buffer(struct buffer_head *bh, int op_flags)
3149 {
3150 lock_buffer(bh);
3151 if (!test_clear_buffer_dirty(bh)) {
3152 unlock_buffer(bh);
3153 return;
3154 }
3155 bh->b_end_io = end_buffer_write_sync;
3156 get_bh(bh);
3157 submit_bh(REQ_OP_WRITE, op_flags, bh);
3158 }
3159 EXPORT_SYMBOL(write_dirty_buffer);
3160
3161
3162
3163
3164
3165
3166 int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
3167 {
3168 int ret = 0;
3169
3170 WARN_ON(atomic_read(&bh->b_count) < 1);
3171 lock_buffer(bh);
3172 if (test_clear_buffer_dirty(bh)) {
3173 get_bh(bh);
3174 bh->b_end_io = end_buffer_write_sync;
3175 ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
3176 wait_on_buffer(bh);
3177 if (!ret && !buffer_uptodate(bh))
3178 ret = -EIO;
3179 } else {
3180 unlock_buffer(bh);
3181 }
3182 return ret;
3183 }
3184 EXPORT_SYMBOL(__sync_dirty_buffer);
3185
3186 int sync_dirty_buffer(struct buffer_head *bh)
3187 {
3188 return __sync_dirty_buffer(bh, REQ_SYNC);
3189 }
3190 EXPORT_SYMBOL(sync_dirty_buffer);
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212 static inline int buffer_busy(struct buffer_head *bh)
3213 {
3214 return atomic_read(&bh->b_count) |
3215 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
3216 }
3217
3218 static int
3219 drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3220 {
3221 struct buffer_head *head = page_buffers(page);
3222 struct buffer_head *bh;
3223
3224 bh = head;
3225 do {
3226 if (buffer_busy(bh))
3227 goto failed;
3228 bh = bh->b_this_page;
3229 } while (bh != head);
3230
3231 do {
3232 struct buffer_head *next = bh->b_this_page;
3233
3234 if (bh->b_assoc_map)
3235 __remove_assoc_queue(bh);
3236 bh = next;
3237 } while (bh != head);
3238 *buffers_to_free = head;
3239 __clear_page_buffers(page);
3240 return 1;
3241 failed:
3242 return 0;
3243 }
3244
3245 int try_to_free_buffers(struct page *page)
3246 {
3247 struct address_space * const mapping = page->mapping;
3248 struct buffer_head *buffers_to_free = NULL;
3249 int ret = 0;
3250
3251 BUG_ON(!PageLocked(page));
3252 if (PageWriteback(page))
3253 return 0;
3254
3255 if (mapping == NULL) {
3256 ret = drop_buffers(page, &buffers_to_free);
3257 goto out;
3258 }
3259
3260 spin_lock(&mapping->private_lock);
3261 ret = drop_buffers(page, &buffers_to_free);
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277 if (ret)
3278 cancel_dirty_page(page);
3279 spin_unlock(&mapping->private_lock);
3280 out:
3281 if (buffers_to_free) {
3282 struct buffer_head *bh = buffers_to_free;
3283
3284 do {
3285 struct buffer_head *next = bh->b_this_page;
3286 free_buffer_head(bh);
3287 bh = next;
3288 } while (bh != buffers_to_free);
3289 }
3290 return ret;
3291 }
3292 EXPORT_SYMBOL(try_to_free_buffers);
3293
3294
3295
3296
3297
3298
3299
3300
3301 SYSCALL_DEFINE2(bdflush, int, func, long, data)
3302 {
3303 static int msg_count;
3304
3305 if (!capable(CAP_SYS_ADMIN))
3306 return -EPERM;
3307
3308 if (msg_count < 5) {
3309 msg_count++;
3310 printk(KERN_INFO
3311 "warning: process `%s' used the obsolete bdflush"
3312 " system call\n", current->comm);
3313 printk(KERN_INFO "Fix your initscripts?\n");
3314 }
3315
3316 if (func == 1)
3317 do_exit(0);
3318 return 0;
3319 }
3320
3321
3322
3323
3324 static struct kmem_cache *bh_cachep __read_mostly;
3325
3326
3327
3328
3329
3330 static unsigned long max_buffer_heads;
3331
3332 int buffer_heads_over_limit;
3333
3334 struct bh_accounting {
3335 int nr;
3336 int ratelimit;
3337 };
3338
3339 static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
3340
3341 static void recalc_bh_state(void)
3342 {
3343 int i;
3344 int tot = 0;
3345
3346 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3347 return;
3348 __this_cpu_write(bh_accounting.ratelimit, 0);
3349 for_each_online_cpu(i)
3350 tot += per_cpu(bh_accounting, i).nr;
3351 buffer_heads_over_limit = (tot > max_buffer_heads);
3352 }
3353
3354 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3355 {
3356 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3357 if (ret) {
3358 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3359 preempt_disable();
3360 __this_cpu_inc(bh_accounting.nr);
3361 recalc_bh_state();
3362 preempt_enable();
3363 }
3364 return ret;
3365 }
3366 EXPORT_SYMBOL(alloc_buffer_head);
3367
3368 void free_buffer_head(struct buffer_head *bh)
3369 {
3370 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3371 kmem_cache_free(bh_cachep, bh);
3372 preempt_disable();
3373 __this_cpu_dec(bh_accounting.nr);
3374 recalc_bh_state();
3375 preempt_enable();
3376 }
3377 EXPORT_SYMBOL(free_buffer_head);
3378
3379 static int buffer_exit_cpu_dead(unsigned int cpu)
3380 {
3381 int i;
3382 struct bh_lru *b = &per_cpu(bh_lrus, cpu);
3383
3384 for (i = 0; i < BH_LRU_SIZE; i++) {
3385 brelse(b->bhs[i]);
3386 b->bhs[i] = NULL;
3387 }
3388 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3389 per_cpu(bh_accounting, cpu).nr = 0;
3390 return 0;
3391 }
3392
3393
3394
3395
3396
3397
3398
3399
3400 int bh_uptodate_or_lock(struct buffer_head *bh)
3401 {
3402 if (!buffer_uptodate(bh)) {
3403 lock_buffer(bh);
3404 if (!buffer_uptodate(bh))
3405 return 0;
3406 unlock_buffer(bh);
3407 }
3408 return 1;
3409 }
3410 EXPORT_SYMBOL(bh_uptodate_or_lock);
3411
3412
3413
3414
3415
3416
3417
3418 int bh_submit_read(struct buffer_head *bh)
3419 {
3420 BUG_ON(!buffer_locked(bh));
3421
3422 if (buffer_uptodate(bh)) {
3423 unlock_buffer(bh);
3424 return 0;
3425 }
3426
3427 get_bh(bh);
3428 bh->b_end_io = end_buffer_read_sync;
3429 submit_bh(REQ_OP_READ, 0, bh);
3430 wait_on_buffer(bh);
3431 if (buffer_uptodate(bh))
3432 return 0;
3433 return -EIO;
3434 }
3435 EXPORT_SYMBOL(bh_submit_read);
3436
3437 void __init buffer_init(void)
3438 {
3439 unsigned long nrpages;
3440 int ret;
3441
3442 bh_cachep = kmem_cache_create("buffer_head",
3443 sizeof(struct buffer_head), 0,
3444 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3445 SLAB_MEM_SPREAD),
3446 NULL);
3447
3448
3449
3450
3451 nrpages = (nr_free_buffer_pages() * 10) / 100;
3452 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
3453 ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
3454 NULL, buffer_exit_cpu_dead);
3455 WARN_ON(ret < 0);
3456 }