This source file includes following definitions.
- mb_correct_addr_and_bit
- mb_test_bit
- mb_set_bit
- mb_clear_bit
- mb_test_and_clear_bit
- mb_find_next_zero_bit
- mb_find_next_bit
- mb_find_buddy
- mb_free_blocks_double
- mb_mark_used_double
- mb_cmp_bitmaps
- mb_free_blocks_double
- mb_mark_used_double
- mb_cmp_bitmaps
- __mb_check_buddy
- ext4_mb_mark_free_simple
- mb_set_largest_free_order
- ext4_mb_generate_buddy
- mb_regenerate_buddy
- ext4_mb_init_cache
- ext4_mb_get_buddy_page_lock
- ext4_mb_put_buddy_page_lock
- ext4_mb_init_group
- ext4_mb_load_buddy_gfp
- ext4_mb_load_buddy
- ext4_mb_unload_buddy
- mb_find_order_for_block
- mb_clear_bits
- mb_test_and_clear_bits
- ext4_set_bits
- mb_buddy_adjust_border
- mb_buddy_mark_free
- mb_free_blocks
- mb_find_extent
- mb_mark_used
- ext4_mb_use_best_found
- ext4_mb_check_limits
- ext4_mb_measure_extent
- ext4_mb_try_best_found
- ext4_mb_find_by_goal
- ext4_mb_simple_scan_group
- ext4_mb_complex_scan_group
- ext4_mb_scan_aligned
- ext4_mb_good_group
- ext4_mb_regular_allocator
- ext4_mb_seq_groups_start
- ext4_mb_seq_groups_next
- ext4_mb_seq_groups_show
- ext4_mb_seq_groups_stop
- get_groupinfo_cache
- ext4_mb_alloc_groupinfo
- ext4_mb_add_groupinfo
- ext4_mb_init_backend
- ext4_groupinfo_destroy_slabs
- ext4_groupinfo_create_slab
- ext4_mb_init
- ext4_mb_cleanup_pa
- ext4_mb_release
- ext4_issue_discard
- ext4_free_data_in_buddy
- ext4_process_freed_data
- ext4_init_mballoc
- ext4_exit_mballoc
- ext4_mb_mark_diskspace_used
- ext4_mb_normalize_group_request
- ext4_mb_normalize_request
- ext4_mb_collect_stats
- ext4_discard_allocated_blocks
- ext4_mb_use_inode_pa
- ext4_mb_use_group_pa
- ext4_mb_check_group_pa
- ext4_mb_use_preallocated
- ext4_mb_generate_from_freelist
- ext4_mb_generate_from_pa
- ext4_mb_pa_callback
- ext4_mb_put_pa
- ext4_mb_new_inode_pa
- ext4_mb_new_group_pa
- ext4_mb_new_preallocation
- ext4_mb_release_inode_pa
- ext4_mb_release_group_pa
- ext4_mb_discard_group_preallocations
- ext4_discard_preallocations
- ext4_mb_show_ac
- ext4_mb_show_ac
- ext4_mb_group_or_file
- ext4_mb_initialize_context
- ext4_mb_discard_lg_preallocations
- ext4_mb_add_n_trim
- ext4_mb_release_context
- ext4_mb_discard_preallocations
- ext4_mb_new_blocks
- ext4_try_merge_freed_extent
- ext4_mb_free_metadata
- ext4_free_blocks
- ext4_group_add_blocks
- ext4_trim_extent
- ext4_trim_all_free
- ext4_trim_fs
- ext4_mballoc_query_range
1
2
3
4
5
6
7
8
9
10
11
12 #include "ext4_jbd2.h"
13 #include "mballoc.h"
14 #include <linux/log2.h>
15 #include <linux/module.h>
16 #include <linux/slab.h>
17 #include <linux/nospec.h>
18 #include <linux/backing-dev.h>
19 #include <trace/events/ext4.h>
20
21 #ifdef CONFIG_EXT4_DEBUG
22 ushort ext4_mballoc_debug __read_mostly;
23
24 module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
25 MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
26 #endif
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339 static struct kmem_cache *ext4_pspace_cachep;
340 static struct kmem_cache *ext4_ac_cachep;
341 static struct kmem_cache *ext4_free_data_cachep;
342
343
344
345
346 #define NR_GRPINFO_CACHES 8
347 static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
349 static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
350 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
351 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
352 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
353 };
354
355 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
356 ext4_group_t group);
357 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
358 ext4_group_t group);
359
360 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
361 {
362 #if BITS_PER_LONG == 64
363 *bit += ((unsigned long) addr & 7UL) << 3;
364 addr = (void *) ((unsigned long) addr & ~7UL);
365 #elif BITS_PER_LONG == 32
366 *bit += ((unsigned long) addr & 3UL) << 3;
367 addr = (void *) ((unsigned long) addr & ~3UL);
368 #else
369 #error "how many bits you are?!"
370 #endif
371 return addr;
372 }
373
374 static inline int mb_test_bit(int bit, void *addr)
375 {
376
377
378
379
380 addr = mb_correct_addr_and_bit(&bit, addr);
381 return ext4_test_bit(bit, addr);
382 }
383
384 static inline void mb_set_bit(int bit, void *addr)
385 {
386 addr = mb_correct_addr_and_bit(&bit, addr);
387 ext4_set_bit(bit, addr);
388 }
389
390 static inline void mb_clear_bit(int bit, void *addr)
391 {
392 addr = mb_correct_addr_and_bit(&bit, addr);
393 ext4_clear_bit(bit, addr);
394 }
395
396 static inline int mb_test_and_clear_bit(int bit, void *addr)
397 {
398 addr = mb_correct_addr_and_bit(&bit, addr);
399 return ext4_test_and_clear_bit(bit, addr);
400 }
401
402 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
403 {
404 int fix = 0, ret, tmpmax;
405 addr = mb_correct_addr_and_bit(&fix, addr);
406 tmpmax = max + fix;
407 start += fix;
408
409 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
410 if (ret > max)
411 return max;
412 return ret;
413 }
414
415 static inline int mb_find_next_bit(void *addr, int max, int start)
416 {
417 int fix = 0, ret, tmpmax;
418 addr = mb_correct_addr_and_bit(&fix, addr);
419 tmpmax = max + fix;
420 start += fix;
421
422 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
423 if (ret > max)
424 return max;
425 return ret;
426 }
427
428 static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
429 {
430 char *bb;
431
432 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
433 BUG_ON(max == NULL);
434
435 if (order > e4b->bd_blkbits + 1) {
436 *max = 0;
437 return NULL;
438 }
439
440
441 if (order == 0) {
442 *max = 1 << (e4b->bd_blkbits + 3);
443 return e4b->bd_bitmap;
444 }
445
446 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
447 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
448
449 return bb;
450 }
451
452 #ifdef DOUBLE_CHECK
453 static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
454 int first, int count)
455 {
456 int i;
457 struct super_block *sb = e4b->bd_sb;
458
459 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
460 return;
461 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
462 for (i = 0; i < count; i++) {
463 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
464 ext4_fsblk_t blocknr;
465
466 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
467 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
468 ext4_grp_locked_error(sb, e4b->bd_group,
469 inode ? inode->i_ino : 0,
470 blocknr,
471 "freeing block already freed "
472 "(bit %u)",
473 first + i);
474 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
475 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
476 }
477 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
478 }
479 }
480
481 static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
482 {
483 int i;
484
485 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
486 return;
487 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
488 for (i = 0; i < count; i++) {
489 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
490 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
491 }
492 }
493
494 static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
495 {
496 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
497 unsigned char *b1, *b2;
498 int i;
499 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
500 b2 = (unsigned char *) bitmap;
501 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
502 if (b1[i] != b2[i]) {
503 ext4_msg(e4b->bd_sb, KERN_ERR,
504 "corruption in group %u "
505 "at byte %u(%u): %x in copy != %x "
506 "on disk/prealloc",
507 e4b->bd_group, i, i * 8, b1[i], b2[i]);
508 BUG();
509 }
510 }
511 }
512 }
513
514 #else
515 static inline void mb_free_blocks_double(struct inode *inode,
516 struct ext4_buddy *e4b, int first, int count)
517 {
518 return;
519 }
520 static inline void mb_mark_used_double(struct ext4_buddy *e4b,
521 int first, int count)
522 {
523 return;
524 }
525 static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
526 {
527 return;
528 }
529 #endif
530
531 #ifdef AGGRESSIVE_CHECK
532
533 #define MB_CHECK_ASSERT(assert) \
534 do { \
535 if (!(assert)) { \
536 printk(KERN_EMERG \
537 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
538 function, file, line, # assert); \
539 BUG(); \
540 } \
541 } while (0)
542
543 static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
544 const char *function, int line)
545 {
546 struct super_block *sb = e4b->bd_sb;
547 int order = e4b->bd_blkbits + 1;
548 int max;
549 int max2;
550 int i;
551 int j;
552 int k;
553 int count;
554 struct ext4_group_info *grp;
555 int fragments = 0;
556 int fstart;
557 struct list_head *cur;
558 void *buddy;
559 void *buddy2;
560
561 {
562 static int mb_check_counter;
563 if (mb_check_counter++ % 100 != 0)
564 return 0;
565 }
566
567 while (order > 1) {
568 buddy = mb_find_buddy(e4b, order, &max);
569 MB_CHECK_ASSERT(buddy);
570 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
571 MB_CHECK_ASSERT(buddy2);
572 MB_CHECK_ASSERT(buddy != buddy2);
573 MB_CHECK_ASSERT(max * 2 == max2);
574
575 count = 0;
576 for (i = 0; i < max; i++) {
577
578 if (mb_test_bit(i, buddy)) {
579
580 if (!mb_test_bit(i << 1, buddy2)) {
581 MB_CHECK_ASSERT(
582 mb_test_bit((i<<1)+1, buddy2));
583 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
584 MB_CHECK_ASSERT(
585 mb_test_bit(i << 1, buddy2));
586 }
587 continue;
588 }
589
590
591 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
592 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
593
594 for (j = 0; j < (1 << order); j++) {
595 k = (i * (1 << order)) + j;
596 MB_CHECK_ASSERT(
597 !mb_test_bit(k, e4b->bd_bitmap));
598 }
599 count++;
600 }
601 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
602 order--;
603 }
604
605 fstart = -1;
606 buddy = mb_find_buddy(e4b, 0, &max);
607 for (i = 0; i < max; i++) {
608 if (!mb_test_bit(i, buddy)) {
609 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
610 if (fstart == -1) {
611 fragments++;
612 fstart = i;
613 }
614 continue;
615 }
616 fstart = -1;
617
618 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
619 buddy2 = mb_find_buddy(e4b, j, &max2);
620 k = i >> j;
621 MB_CHECK_ASSERT(k < max2);
622 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
623 }
624 }
625 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
626 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
627
628 grp = ext4_get_group_info(sb, e4b->bd_group);
629 list_for_each(cur, &grp->bb_prealloc_list) {
630 ext4_group_t groupnr;
631 struct ext4_prealloc_space *pa;
632 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
633 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
634 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
635 for (i = 0; i < pa->pa_len; i++)
636 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
637 }
638 return 0;
639 }
640 #undef MB_CHECK_ASSERT
641 #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
642 __FILE__, __func__, __LINE__)
643 #else
644 #define mb_check_buddy(e4b)
645 #endif
646
647
648
649
650
651
652
653 static void ext4_mb_mark_free_simple(struct super_block *sb,
654 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
655 struct ext4_group_info *grp)
656 {
657 struct ext4_sb_info *sbi = EXT4_SB(sb);
658 ext4_grpblk_t min;
659 ext4_grpblk_t max;
660 ext4_grpblk_t chunk;
661 unsigned int border;
662
663 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
664
665 border = 2 << sb->s_blocksize_bits;
666
667 while (len > 0) {
668
669 max = ffs(first | border) - 1;
670
671
672 min = fls(len) - 1;
673
674 if (max < min)
675 min = max;
676 chunk = 1 << min;
677
678
679 grp->bb_counters[min]++;
680 if (min > 0)
681 mb_clear_bit(first >> min,
682 buddy + sbi->s_mb_offsets[min]);
683
684 len -= chunk;
685 first += chunk;
686 }
687 }
688
689
690
691
692
693 static void
694 mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
695 {
696 int i;
697 int bits;
698
699 grp->bb_largest_free_order = -1;
700
701 bits = sb->s_blocksize_bits + 1;
702 for (i = bits; i >= 0; i--) {
703 if (grp->bb_counters[i] > 0) {
704 grp->bb_largest_free_order = i;
705 break;
706 }
707 }
708 }
709
710 static noinline_for_stack
711 void ext4_mb_generate_buddy(struct super_block *sb,
712 void *buddy, void *bitmap, ext4_group_t group)
713 {
714 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_sb_info *sbi = EXT4_SB(sb);
716 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
717 ext4_grpblk_t i = 0;
718 ext4_grpblk_t first;
719 ext4_grpblk_t len;
720 unsigned free = 0;
721 unsigned fragments = 0;
722 unsigned long long period = get_cycles();
723
724
725
726 i = mb_find_next_zero_bit(bitmap, max, 0);
727 grp->bb_first_free = i;
728 while (i < max) {
729 fragments++;
730 first = i;
731 i = mb_find_next_bit(bitmap, max, i);
732 len = i - first;
733 free += len;
734 if (len > 1)
735 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
736 else
737 grp->bb_counters[0]++;
738 if (i < max)
739 i = mb_find_next_zero_bit(bitmap, max, i);
740 }
741 grp->bb_fragments = fragments;
742
743 if (free != grp->bb_free) {
744 ext4_grp_locked_error(sb, group, 0, 0,
745 "block bitmap and bg descriptor "
746 "inconsistent: %u vs %u free clusters",
747 free, grp->bb_free);
748
749
750
751
752 grp->bb_free = free;
753 ext4_mark_group_bitmap_corrupted(sb, group,
754 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
755 }
756 mb_set_largest_free_order(sb, grp);
757
758 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
759
760 period = get_cycles() - period;
761 spin_lock(&sbi->s_bal_lock);
762 sbi->s_mb_buddies_generated++;
763 sbi->s_mb_generation_time += period;
764 spin_unlock(&sbi->s_bal_lock);
765 }
766
767 static void mb_regenerate_buddy(struct ext4_buddy *e4b)
768 {
769 int count;
770 int order = 1;
771 void *buddy;
772
773 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
774 ext4_set_bits(buddy, 0, count);
775 }
776 e4b->bd_info->bb_fragments = 0;
777 memset(e4b->bd_info->bb_counters, 0,
778 sizeof(*e4b->bd_info->bb_counters) *
779 (e4b->bd_sb->s_blocksize_bits + 2));
780
781 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
782 e4b->bd_bitmap, e4b->bd_group);
783 }
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805 static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
806 {
807 ext4_group_t ngroups;
808 int blocksize;
809 int blocks_per_page;
810 int groups_per_page;
811 int err = 0;
812 int i;
813 ext4_group_t first_group, group;
814 int first_block;
815 struct super_block *sb;
816 struct buffer_head *bhs;
817 struct buffer_head **bh = NULL;
818 struct inode *inode;
819 char *data;
820 char *bitmap;
821 struct ext4_group_info *grinfo;
822
823 mb_debug(1, "init page %lu\n", page->index);
824
825 inode = page->mapping->host;
826 sb = inode->i_sb;
827 ngroups = ext4_get_groups_count(sb);
828 blocksize = i_blocksize(inode);
829 blocks_per_page = PAGE_SIZE / blocksize;
830
831 groups_per_page = blocks_per_page >> 1;
832 if (groups_per_page == 0)
833 groups_per_page = 1;
834
835
836 if (groups_per_page > 1) {
837 i = sizeof(struct buffer_head *) * groups_per_page;
838 bh = kzalloc(i, gfp);
839 if (bh == NULL) {
840 err = -ENOMEM;
841 goto out;
842 }
843 } else
844 bh = &bhs;
845
846 first_group = page->index * blocks_per_page / 2;
847
848
849 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
850 if (group >= ngroups)
851 break;
852
853 grinfo = ext4_get_group_info(sb, group);
854
855
856
857
858
859
860 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
861 bh[i] = NULL;
862 continue;
863 }
864 bh[i] = ext4_read_block_bitmap_nowait(sb, group);
865 if (IS_ERR(bh[i])) {
866 err = PTR_ERR(bh[i]);
867 bh[i] = NULL;
868 goto out;
869 }
870 mb_debug(1, "read bitmap for group %u\n", group);
871 }
872
873
874 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
875 int err2;
876
877 if (!bh[i])
878 continue;
879 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
880 if (!err)
881 err = err2;
882 }
883
884 first_block = page->index * blocks_per_page;
885 for (i = 0; i < blocks_per_page; i++) {
886 group = (first_block + i) >> 1;
887 if (group >= ngroups)
888 break;
889
890 if (!bh[group - first_group])
891
892 continue;
893
894 if (!buffer_verified(bh[group - first_group]))
895
896 continue;
897 err = 0;
898
899
900
901
902
903
904
905 data = page_address(page) + (i * blocksize);
906 bitmap = bh[group - first_group]->b_data;
907
908
909
910
911
912 if ((first_block + i) & 1) {
913
914 BUG_ON(incore == NULL);
915 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
916 group, page->index, i * blocksize);
917 trace_ext4_mb_buddy_bitmap_load(sb, group);
918 grinfo = ext4_get_group_info(sb, group);
919 grinfo->bb_fragments = 0;
920 memset(grinfo->bb_counters, 0,
921 sizeof(*grinfo->bb_counters) *
922 (sb->s_blocksize_bits+2));
923
924
925
926 ext4_lock_group(sb, group);
927
928 memset(data, 0xff, blocksize);
929 ext4_mb_generate_buddy(sb, data, incore, group);
930 ext4_unlock_group(sb, group);
931 incore = NULL;
932 } else {
933
934 BUG_ON(incore != NULL);
935 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
936 group, page->index, i * blocksize);
937 trace_ext4_mb_bitmap_load(sb, group);
938
939
940 ext4_lock_group(sb, group);
941 memcpy(data, bitmap, blocksize);
942
943
944 ext4_mb_generate_from_pa(sb, data, group);
945 ext4_mb_generate_from_freelist(sb, data, group);
946 ext4_unlock_group(sb, group);
947
948
949
950
951 incore = data;
952 }
953 }
954 SetPageUptodate(page);
955
956 out:
957 if (bh) {
958 for (i = 0; i < groups_per_page; i++)
959 brelse(bh[i]);
960 if (bh != &bhs)
961 kfree(bh);
962 }
963 return err;
964 }
965
966
967
968
969
970
971
972 static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
973 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
974 {
975 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
976 int block, pnum, poff;
977 int blocks_per_page;
978 struct page *page;
979
980 e4b->bd_buddy_page = NULL;
981 e4b->bd_bitmap_page = NULL;
982
983 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
984
985
986
987
988
989 block = group * 2;
990 pnum = block / blocks_per_page;
991 poff = block % blocks_per_page;
992 page = find_or_create_page(inode->i_mapping, pnum, gfp);
993 if (!page)
994 return -ENOMEM;
995 BUG_ON(page->mapping != inode->i_mapping);
996 e4b->bd_bitmap_page = page;
997 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
998
999 if (blocks_per_page >= 2) {
1000
1001 return 0;
1002 }
1003
1004 block++;
1005 pnum = block / blocks_per_page;
1006 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1007 if (!page)
1008 return -ENOMEM;
1009 BUG_ON(page->mapping != inode->i_mapping);
1010 e4b->bd_buddy_page = page;
1011 return 0;
1012 }
1013
1014 static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1015 {
1016 if (e4b->bd_bitmap_page) {
1017 unlock_page(e4b->bd_bitmap_page);
1018 put_page(e4b->bd_bitmap_page);
1019 }
1020 if (e4b->bd_buddy_page) {
1021 unlock_page(e4b->bd_buddy_page);
1022 put_page(e4b->bd_buddy_page);
1023 }
1024 }
1025
1026
1027
1028
1029
1030
1031 static noinline_for_stack
1032 int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1033 {
1034
1035 struct ext4_group_info *this_grp;
1036 struct ext4_buddy e4b;
1037 struct page *page;
1038 int ret = 0;
1039
1040 might_sleep();
1041 mb_debug(1, "init group %u\n", group);
1042 this_grp = ext4_get_group_info(sb, group);
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1053 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1054
1055
1056
1057
1058 goto err;
1059 }
1060
1061 page = e4b.bd_bitmap_page;
1062 ret = ext4_mb_init_cache(page, NULL, gfp);
1063 if (ret)
1064 goto err;
1065 if (!PageUptodate(page)) {
1066 ret = -EIO;
1067 goto err;
1068 }
1069
1070 if (e4b.bd_buddy_page == NULL) {
1071
1072
1073
1074
1075
1076 ret = 0;
1077 goto err;
1078 }
1079
1080 page = e4b.bd_buddy_page;
1081 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1082 if (ret)
1083 goto err;
1084 if (!PageUptodate(page)) {
1085 ret = -EIO;
1086 goto err;
1087 }
1088 err:
1089 ext4_mb_put_buddy_page_lock(&e4b);
1090 return ret;
1091 }
1092
1093
1094
1095
1096
1097
1098 static noinline_for_stack int
1099 ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1100 struct ext4_buddy *e4b, gfp_t gfp)
1101 {
1102 int blocks_per_page;
1103 int block;
1104 int pnum;
1105 int poff;
1106 struct page *page;
1107 int ret;
1108 struct ext4_group_info *grp;
1109 struct ext4_sb_info *sbi = EXT4_SB(sb);
1110 struct inode *inode = sbi->s_buddy_cache;
1111
1112 might_sleep();
1113 mb_debug(1, "load group %u\n", group);
1114
1115 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1116 grp = ext4_get_group_info(sb, group);
1117
1118 e4b->bd_blkbits = sb->s_blocksize_bits;
1119 e4b->bd_info = grp;
1120 e4b->bd_sb = sb;
1121 e4b->bd_group = group;
1122 e4b->bd_buddy_page = NULL;
1123 e4b->bd_bitmap_page = NULL;
1124
1125 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1126
1127
1128
1129
1130 ret = ext4_mb_init_group(sb, group, gfp);
1131 if (ret)
1132 return ret;
1133 }
1134
1135
1136
1137
1138
1139
1140 block = group * 2;
1141 pnum = block / blocks_per_page;
1142 poff = block % blocks_per_page;
1143
1144
1145
1146 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1147 if (page == NULL || !PageUptodate(page)) {
1148 if (page)
1149
1150
1151
1152
1153
1154
1155
1156
1157 put_page(page);
1158 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1159 if (page) {
1160 BUG_ON(page->mapping != inode->i_mapping);
1161 if (!PageUptodate(page)) {
1162 ret = ext4_mb_init_cache(page, NULL, gfp);
1163 if (ret) {
1164 unlock_page(page);
1165 goto err;
1166 }
1167 mb_cmp_bitmaps(e4b, page_address(page) +
1168 (poff * sb->s_blocksize));
1169 }
1170 unlock_page(page);
1171 }
1172 }
1173 if (page == NULL) {
1174 ret = -ENOMEM;
1175 goto err;
1176 }
1177 if (!PageUptodate(page)) {
1178 ret = -EIO;
1179 goto err;
1180 }
1181
1182
1183 e4b->bd_bitmap_page = page;
1184 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1185
1186 block++;
1187 pnum = block / blocks_per_page;
1188 poff = block % blocks_per_page;
1189
1190 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1191 if (page == NULL || !PageUptodate(page)) {
1192 if (page)
1193 put_page(page);
1194 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1195 if (page) {
1196 BUG_ON(page->mapping != inode->i_mapping);
1197 if (!PageUptodate(page)) {
1198 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1199 gfp);
1200 if (ret) {
1201 unlock_page(page);
1202 goto err;
1203 }
1204 }
1205 unlock_page(page);
1206 }
1207 }
1208 if (page == NULL) {
1209 ret = -ENOMEM;
1210 goto err;
1211 }
1212 if (!PageUptodate(page)) {
1213 ret = -EIO;
1214 goto err;
1215 }
1216
1217
1218 e4b->bd_buddy_page = page;
1219 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1220
1221 BUG_ON(e4b->bd_bitmap_page == NULL);
1222 BUG_ON(e4b->bd_buddy_page == NULL);
1223
1224 return 0;
1225
1226 err:
1227 if (page)
1228 put_page(page);
1229 if (e4b->bd_bitmap_page)
1230 put_page(e4b->bd_bitmap_page);
1231 if (e4b->bd_buddy_page)
1232 put_page(e4b->bd_buddy_page);
1233 e4b->bd_buddy = NULL;
1234 e4b->bd_bitmap = NULL;
1235 return ret;
1236 }
1237
1238 static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1239 struct ext4_buddy *e4b)
1240 {
1241 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1242 }
1243
1244 static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1245 {
1246 if (e4b->bd_bitmap_page)
1247 put_page(e4b->bd_bitmap_page);
1248 if (e4b->bd_buddy_page)
1249 put_page(e4b->bd_buddy_page);
1250 }
1251
1252
1253 static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1254 {
1255 int order = 1;
1256 int bb_incr = 1 << (e4b->bd_blkbits - 1);
1257 void *bb;
1258
1259 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1260 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1261
1262 bb = e4b->bd_buddy;
1263 while (order <= e4b->bd_blkbits + 1) {
1264 block = block >> 1;
1265 if (!mb_test_bit(block, bb)) {
1266
1267 return order;
1268 }
1269 bb += bb_incr;
1270 bb_incr >>= 1;
1271 order++;
1272 }
1273 return 0;
1274 }
1275
1276 static void mb_clear_bits(void *bm, int cur, int len)
1277 {
1278 __u32 *addr;
1279
1280 len = cur + len;
1281 while (cur < len) {
1282 if ((cur & 31) == 0 && (len - cur) >= 32) {
1283
1284 addr = bm + (cur >> 3);
1285 *addr = 0;
1286 cur += 32;
1287 continue;
1288 }
1289 mb_clear_bit(cur, bm);
1290 cur++;
1291 }
1292 }
1293
1294
1295
1296
1297 static int mb_test_and_clear_bits(void *bm, int cur, int len)
1298 {
1299 __u32 *addr;
1300 int zero_bit = -1;
1301
1302 len = cur + len;
1303 while (cur < len) {
1304 if ((cur & 31) == 0 && (len - cur) >= 32) {
1305
1306 addr = bm + (cur >> 3);
1307 if (*addr != (__u32)(-1) && zero_bit == -1)
1308 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1309 *addr = 0;
1310 cur += 32;
1311 continue;
1312 }
1313 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1314 zero_bit = cur;
1315 cur++;
1316 }
1317
1318 return zero_bit;
1319 }
1320
1321 void ext4_set_bits(void *bm, int cur, int len)
1322 {
1323 __u32 *addr;
1324
1325 len = cur + len;
1326 while (cur < len) {
1327 if ((cur & 31) == 0 && (len - cur) >= 32) {
1328
1329 addr = bm + (cur >> 3);
1330 *addr = 0xffffffff;
1331 cur += 32;
1332 continue;
1333 }
1334 mb_set_bit(cur, bm);
1335 cur++;
1336 }
1337 }
1338
1339
1340
1341
1342 static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1343 {
1344 if (mb_test_bit(*bit + side, bitmap)) {
1345 mb_clear_bit(*bit, bitmap);
1346 (*bit) -= side;
1347 return 1;
1348 }
1349 else {
1350 (*bit) += side;
1351 mb_set_bit(*bit, bitmap);
1352 return -1;
1353 }
1354 }
1355
1356 static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1357 {
1358 int max;
1359 int order = 1;
1360 void *buddy = mb_find_buddy(e4b, order, &max);
1361
1362 while (buddy) {
1363 void *buddy2;
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394 if (first & 1)
1395 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1396 if (!(last & 1))
1397 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1398 if (first > last)
1399 break;
1400 order++;
1401
1402 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1403 mb_clear_bits(buddy, first, last - first + 1);
1404 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1405 break;
1406 }
1407 first >>= 1;
1408 last >>= 1;
1409 buddy = buddy2;
1410 }
1411 }
1412
1413 static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1414 int first, int count)
1415 {
1416 int left_is_free = 0;
1417 int right_is_free = 0;
1418 int block;
1419 int last = first + count - 1;
1420 struct super_block *sb = e4b->bd_sb;
1421
1422 if (WARN_ON(count == 0))
1423 return;
1424 BUG_ON(last >= (sb->s_blocksize << 3));
1425 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1426
1427 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1428 return;
1429
1430 mb_check_buddy(e4b);
1431 mb_free_blocks_double(inode, e4b, first, count);
1432
1433 e4b->bd_info->bb_free += count;
1434 if (first < e4b->bd_info->bb_first_free)
1435 e4b->bd_info->bb_first_free = first;
1436
1437
1438
1439
1440 if (first != 0)
1441 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1442 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1443 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1444 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1445
1446 if (unlikely(block != -1)) {
1447 struct ext4_sb_info *sbi = EXT4_SB(sb);
1448 ext4_fsblk_t blocknr;
1449
1450 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1451 blocknr += EXT4_C2B(sbi, block);
1452 ext4_grp_locked_error(sb, e4b->bd_group,
1453 inode ? inode->i_ino : 0,
1454 blocknr,
1455 "freeing already freed block "
1456 "(bit %u); block bitmap corrupt.",
1457 block);
1458 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1459 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1460 mb_regenerate_buddy(e4b);
1461 goto done;
1462 }
1463
1464
1465 if (left_is_free && right_is_free)
1466 e4b->bd_info->bb_fragments--;
1467 else if (!left_is_free && !right_is_free)
1468 e4b->bd_info->bb_fragments++;
1469
1470
1471
1472
1473
1474
1475
1476 if (first & 1) {
1477 first += !left_is_free;
1478 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1479 }
1480 if (!(last & 1)) {
1481 last -= !right_is_free;
1482 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1483 }
1484
1485 if (first <= last)
1486 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1487
1488 done:
1489 mb_set_largest_free_order(sb, e4b->bd_info);
1490 mb_check_buddy(e4b);
1491 }
1492
1493 static int mb_find_extent(struct ext4_buddy *e4b, int block,
1494 int needed, struct ext4_free_extent *ex)
1495 {
1496 int next = block;
1497 int max, order;
1498 void *buddy;
1499
1500 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1501 BUG_ON(ex == NULL);
1502
1503 buddy = mb_find_buddy(e4b, 0, &max);
1504 BUG_ON(buddy == NULL);
1505 BUG_ON(block >= max);
1506 if (mb_test_bit(block, buddy)) {
1507 ex->fe_len = 0;
1508 ex->fe_start = 0;
1509 ex->fe_group = 0;
1510 return 0;
1511 }
1512
1513
1514 order = mb_find_order_for_block(e4b, block);
1515 block = block >> order;
1516
1517 ex->fe_len = 1 << order;
1518 ex->fe_start = block << order;
1519 ex->fe_group = e4b->bd_group;
1520
1521
1522 next = next - ex->fe_start;
1523 ex->fe_len -= next;
1524 ex->fe_start += next;
1525
1526 while (needed > ex->fe_len &&
1527 mb_find_buddy(e4b, order, &max)) {
1528
1529 if (block + 1 >= max)
1530 break;
1531
1532 next = (block + 1) * (1 << order);
1533 if (mb_test_bit(next, e4b->bd_bitmap))
1534 break;
1535
1536 order = mb_find_order_for_block(e4b, next);
1537
1538 block = next >> order;
1539 ex->fe_len += 1 << order;
1540 }
1541
1542 if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
1543
1544 WARN_ON(1);
1545 ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
1546 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1547 block, order, needed, ex->fe_group, ex->fe_start,
1548 ex->fe_len, ex->fe_logical);
1549 ex->fe_len = 0;
1550 ex->fe_start = 0;
1551 ex->fe_group = 0;
1552 }
1553 return ex->fe_len;
1554 }
1555
1556 static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1557 {
1558 int ord;
1559 int mlen = 0;
1560 int max = 0;
1561 int cur;
1562 int start = ex->fe_start;
1563 int len = ex->fe_len;
1564 unsigned ret = 0;
1565 int len0 = len;
1566 void *buddy;
1567
1568 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1569 BUG_ON(e4b->bd_group != ex->fe_group);
1570 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1571 mb_check_buddy(e4b);
1572 mb_mark_used_double(e4b, start, len);
1573
1574 e4b->bd_info->bb_free -= len;
1575 if (e4b->bd_info->bb_first_free == start)
1576 e4b->bd_info->bb_first_free += len;
1577
1578
1579 if (start != 0)
1580 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1581 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1582 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1583 if (mlen && max)
1584 e4b->bd_info->bb_fragments++;
1585 else if (!mlen && !max)
1586 e4b->bd_info->bb_fragments--;
1587
1588
1589 while (len) {
1590 ord = mb_find_order_for_block(e4b, start);
1591
1592 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1593
1594 mlen = 1 << ord;
1595 buddy = mb_find_buddy(e4b, ord, &max);
1596 BUG_ON((start >> ord) >= max);
1597 mb_set_bit(start >> ord, buddy);
1598 e4b->bd_info->bb_counters[ord]--;
1599 start += mlen;
1600 len -= mlen;
1601 BUG_ON(len < 0);
1602 continue;
1603 }
1604
1605
1606 if (ret == 0)
1607 ret = len | (ord << 16);
1608
1609
1610 BUG_ON(ord <= 0);
1611 buddy = mb_find_buddy(e4b, ord, &max);
1612 mb_set_bit(start >> ord, buddy);
1613 e4b->bd_info->bb_counters[ord]--;
1614
1615 ord--;
1616 cur = (start >> ord) & ~1U;
1617 buddy = mb_find_buddy(e4b, ord, &max);
1618 mb_clear_bit(cur, buddy);
1619 mb_clear_bit(cur + 1, buddy);
1620 e4b->bd_info->bb_counters[ord]++;
1621 e4b->bd_info->bb_counters[ord]++;
1622 }
1623 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1624
1625 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1626 mb_check_buddy(e4b);
1627
1628 return ret;
1629 }
1630
1631
1632
1633
1634 static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1635 struct ext4_buddy *e4b)
1636 {
1637 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1638 int ret;
1639
1640 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1641 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1642
1643 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1644 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1645 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1646
1647
1648
1649 ac->ac_f_ex = ac->ac_b_ex;
1650
1651 ac->ac_status = AC_STATUS_FOUND;
1652 ac->ac_tail = ret & 0xffff;
1653 ac->ac_buddy = ret >> 16;
1654
1655
1656
1657
1658
1659
1660
1661
1662 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1663 get_page(ac->ac_bitmap_page);
1664 ac->ac_buddy_page = e4b->bd_buddy_page;
1665 get_page(ac->ac_buddy_page);
1666
1667 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1668 spin_lock(&sbi->s_md_lock);
1669 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1670 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1671 spin_unlock(&sbi->s_md_lock);
1672 }
1673 }
1674
1675
1676
1677
1678
1679 static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1680 struct ext4_buddy *e4b,
1681 int finish_group)
1682 {
1683 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1684 struct ext4_free_extent *bex = &ac->ac_b_ex;
1685 struct ext4_free_extent *gex = &ac->ac_g_ex;
1686 struct ext4_free_extent ex;
1687 int max;
1688
1689 if (ac->ac_status == AC_STATUS_FOUND)
1690 return;
1691
1692
1693
1694 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1695 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1696 ac->ac_status = AC_STATUS_BREAK;
1697 return;
1698 }
1699
1700
1701
1702
1703 if (bex->fe_len < gex->fe_len)
1704 return;
1705
1706 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1707 && bex->fe_group == e4b->bd_group) {
1708
1709
1710
1711 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1712 if (max >= gex->fe_len) {
1713 ext4_mb_use_best_found(ac, e4b);
1714 return;
1715 }
1716 }
1717 }
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729 static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1730 struct ext4_free_extent *ex,
1731 struct ext4_buddy *e4b)
1732 {
1733 struct ext4_free_extent *bex = &ac->ac_b_ex;
1734 struct ext4_free_extent *gex = &ac->ac_g_ex;
1735
1736 BUG_ON(ex->fe_len <= 0);
1737 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1738 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1739 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1740
1741 ac->ac_found++;
1742
1743
1744
1745
1746 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1747 *bex = *ex;
1748 ext4_mb_use_best_found(ac, e4b);
1749 return;
1750 }
1751
1752
1753
1754
1755 if (ex->fe_len == gex->fe_len) {
1756 *bex = *ex;
1757 ext4_mb_use_best_found(ac, e4b);
1758 return;
1759 }
1760
1761
1762
1763
1764 if (bex->fe_len == 0) {
1765 *bex = *ex;
1766 return;
1767 }
1768
1769
1770
1771
1772 if (bex->fe_len < gex->fe_len) {
1773
1774
1775 if (ex->fe_len > bex->fe_len)
1776 *bex = *ex;
1777 } else if (ex->fe_len > gex->fe_len) {
1778
1779
1780
1781 if (ex->fe_len < bex->fe_len)
1782 *bex = *ex;
1783 }
1784
1785 ext4_mb_check_limits(ac, e4b, 0);
1786 }
1787
1788 static noinline_for_stack
1789 int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1790 struct ext4_buddy *e4b)
1791 {
1792 struct ext4_free_extent ex = ac->ac_b_ex;
1793 ext4_group_t group = ex.fe_group;
1794 int max;
1795 int err;
1796
1797 BUG_ON(ex.fe_len <= 0);
1798 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1799 if (err)
1800 return err;
1801
1802 ext4_lock_group(ac->ac_sb, group);
1803 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1804
1805 if (max > 0) {
1806 ac->ac_b_ex = ex;
1807 ext4_mb_use_best_found(ac, e4b);
1808 }
1809
1810 ext4_unlock_group(ac->ac_sb, group);
1811 ext4_mb_unload_buddy(e4b);
1812
1813 return 0;
1814 }
1815
1816 static noinline_for_stack
1817 int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1818 struct ext4_buddy *e4b)
1819 {
1820 ext4_group_t group = ac->ac_g_ex.fe_group;
1821 int max;
1822 int err;
1823 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1824 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1825 struct ext4_free_extent ex;
1826
1827 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1828 return 0;
1829 if (grp->bb_free == 0)
1830 return 0;
1831
1832 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1833 if (err)
1834 return err;
1835
1836 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1837 ext4_mb_unload_buddy(e4b);
1838 return 0;
1839 }
1840
1841 ext4_lock_group(ac->ac_sb, group);
1842 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1843 ac->ac_g_ex.fe_len, &ex);
1844 ex.fe_logical = 0xDEADFA11;
1845
1846 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1847 ext4_fsblk_t start;
1848
1849 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1850 ex.fe_start;
1851
1852 if (do_div(start, sbi->s_stripe) == 0) {
1853 ac->ac_found++;
1854 ac->ac_b_ex = ex;
1855 ext4_mb_use_best_found(ac, e4b);
1856 }
1857 } else if (max >= ac->ac_g_ex.fe_len) {
1858 BUG_ON(ex.fe_len <= 0);
1859 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1860 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1861 ac->ac_found++;
1862 ac->ac_b_ex = ex;
1863 ext4_mb_use_best_found(ac, e4b);
1864 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1865
1866
1867 BUG_ON(ex.fe_len <= 0);
1868 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1869 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1870 ac->ac_found++;
1871 ac->ac_b_ex = ex;
1872 ext4_mb_use_best_found(ac, e4b);
1873 }
1874 ext4_unlock_group(ac->ac_sb, group);
1875 ext4_mb_unload_buddy(e4b);
1876
1877 return 0;
1878 }
1879
1880
1881
1882
1883
1884 static noinline_for_stack
1885 void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1886 struct ext4_buddy *e4b)
1887 {
1888 struct super_block *sb = ac->ac_sb;
1889 struct ext4_group_info *grp = e4b->bd_info;
1890 void *buddy;
1891 int i;
1892 int k;
1893 int max;
1894
1895 BUG_ON(ac->ac_2order <= 0);
1896 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1897 if (grp->bb_counters[i] == 0)
1898 continue;
1899
1900 buddy = mb_find_buddy(e4b, i, &max);
1901 BUG_ON(buddy == NULL);
1902
1903 k = mb_find_next_zero_bit(buddy, max, 0);
1904 BUG_ON(k >= max);
1905
1906 ac->ac_found++;
1907
1908 ac->ac_b_ex.fe_len = 1 << i;
1909 ac->ac_b_ex.fe_start = k << i;
1910 ac->ac_b_ex.fe_group = e4b->bd_group;
1911
1912 ext4_mb_use_best_found(ac, e4b);
1913
1914 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1915
1916 if (EXT4_SB(sb)->s_mb_stats)
1917 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1918
1919 break;
1920 }
1921 }
1922
1923
1924
1925
1926
1927
1928 static noinline_for_stack
1929 void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1930 struct ext4_buddy *e4b)
1931 {
1932 struct super_block *sb = ac->ac_sb;
1933 void *bitmap = e4b->bd_bitmap;
1934 struct ext4_free_extent ex;
1935 int i;
1936 int free;
1937
1938 free = e4b->bd_info->bb_free;
1939 if (WARN_ON(free <= 0))
1940 return;
1941
1942 i = e4b->bd_info->bb_first_free;
1943
1944 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1945 i = mb_find_next_zero_bit(bitmap,
1946 EXT4_CLUSTERS_PER_GROUP(sb), i);
1947 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1948
1949
1950
1951
1952
1953 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1954 "%d free clusters as per "
1955 "group info. But bitmap says 0",
1956 free);
1957 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1958 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1959 break;
1960 }
1961
1962 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1963 if (WARN_ON(ex.fe_len <= 0))
1964 break;
1965 if (free < ex.fe_len) {
1966 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1967 "%d free clusters as per "
1968 "group info. But got %d blocks",
1969 free, ex.fe_len);
1970 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1971 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1972
1973
1974
1975
1976
1977 break;
1978 }
1979 ex.fe_logical = 0xDEADC0DE;
1980 ext4_mb_measure_extent(ac, &ex, e4b);
1981
1982 i += ex.fe_len;
1983 free -= ex.fe_len;
1984 }
1985
1986 ext4_mb_check_limits(ac, e4b, 1);
1987 }
1988
1989
1990
1991
1992
1993 static noinline_for_stack
1994 void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1995 struct ext4_buddy *e4b)
1996 {
1997 struct super_block *sb = ac->ac_sb;
1998 struct ext4_sb_info *sbi = EXT4_SB(sb);
1999 void *bitmap = e4b->bd_bitmap;
2000 struct ext4_free_extent ex;
2001 ext4_fsblk_t first_group_block;
2002 ext4_fsblk_t a;
2003 ext4_grpblk_t i;
2004 int max;
2005
2006 BUG_ON(sbi->s_stripe == 0);
2007
2008
2009 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2010
2011 a = first_group_block + sbi->s_stripe - 1;
2012 do_div(a, sbi->s_stripe);
2013 i = (a * sbi->s_stripe) - first_group_block;
2014
2015 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2016 if (!mb_test_bit(i, bitmap)) {
2017 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2018 if (max >= sbi->s_stripe) {
2019 ac->ac_found++;
2020 ex.fe_logical = 0xDEADF00D;
2021 ac->ac_b_ex = ex;
2022 ext4_mb_use_best_found(ac, e4b);
2023 break;
2024 }
2025 }
2026 i += sbi->s_stripe;
2027 }
2028 }
2029
2030
2031
2032
2033
2034
2035
2036 static int ext4_mb_good_group(struct ext4_allocation_context *ac,
2037 ext4_group_t group, int cr)
2038 {
2039 unsigned free, fragments;
2040 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2041 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2042
2043 BUG_ON(cr < 0 || cr >= 4);
2044
2045 free = grp->bb_free;
2046 if (free == 0)
2047 return 0;
2048 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2049 return 0;
2050
2051 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2052 return 0;
2053
2054
2055 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2056 int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
2057 if (ret)
2058 return ret;
2059 }
2060
2061 fragments = grp->bb_fragments;
2062 if (fragments == 0)
2063 return 0;
2064
2065 switch (cr) {
2066 case 0:
2067 BUG_ON(ac->ac_2order == 0);
2068
2069
2070 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2071 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2072 ((group % flex_size) == 0))
2073 return 0;
2074
2075 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2076 (free / fragments) >= ac->ac_g_ex.fe_len)
2077 return 1;
2078
2079 if (grp->bb_largest_free_order < ac->ac_2order)
2080 return 0;
2081
2082 return 1;
2083 case 1:
2084 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2085 return 1;
2086 break;
2087 case 2:
2088 if (free >= ac->ac_g_ex.fe_len)
2089 return 1;
2090 break;
2091 case 3:
2092 return 1;
2093 default:
2094 BUG();
2095 }
2096
2097 return 0;
2098 }
2099
2100 static noinline_for_stack int
2101 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2102 {
2103 ext4_group_t ngroups, group, i;
2104 int cr;
2105 int err = 0, first_err = 0;
2106 struct ext4_sb_info *sbi;
2107 struct super_block *sb;
2108 struct ext4_buddy e4b;
2109
2110 sb = ac->ac_sb;
2111 sbi = EXT4_SB(sb);
2112 ngroups = ext4_get_groups_count(sb);
2113
2114 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2115 ngroups = sbi->s_blockfile_groups;
2116
2117 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2118
2119
2120 err = ext4_mb_find_by_goal(ac, &e4b);
2121 if (err || ac->ac_status == AC_STATUS_FOUND)
2122 goto out;
2123
2124 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2125 goto out;
2126
2127
2128
2129
2130
2131
2132 i = fls(ac->ac_g_ex.fe_len);
2133 ac->ac_2order = 0;
2134
2135
2136
2137
2138
2139
2140
2141 if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
2142
2143
2144
2145 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2146 ac->ac_2order = array_index_nospec(i - 1,
2147 sb->s_blocksize_bits + 2);
2148 }
2149
2150
2151 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2152
2153 spin_lock(&sbi->s_md_lock);
2154 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2155 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2156 spin_unlock(&sbi->s_md_lock);
2157 }
2158
2159
2160 cr = ac->ac_2order ? 0 : 1;
2161
2162
2163
2164
2165 repeat:
2166 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2167 ac->ac_criteria = cr;
2168
2169
2170
2171
2172 group = ac->ac_g_ex.fe_group;
2173
2174 for (i = 0; i < ngroups; group++, i++) {
2175 int ret = 0;
2176 cond_resched();
2177
2178
2179
2180
2181 if (group >= ngroups)
2182 group = 0;
2183
2184
2185 ret = ext4_mb_good_group(ac, group, cr);
2186 if (ret <= 0) {
2187 if (!first_err)
2188 first_err = ret;
2189 continue;
2190 }
2191
2192 err = ext4_mb_load_buddy(sb, group, &e4b);
2193 if (err)
2194 goto out;
2195
2196 ext4_lock_group(sb, group);
2197
2198
2199
2200
2201
2202 ret = ext4_mb_good_group(ac, group, cr);
2203 if (ret <= 0) {
2204 ext4_unlock_group(sb, group);
2205 ext4_mb_unload_buddy(&e4b);
2206 if (!first_err)
2207 first_err = ret;
2208 continue;
2209 }
2210
2211 ac->ac_groups_scanned++;
2212 if (cr == 0)
2213 ext4_mb_simple_scan_group(ac, &e4b);
2214 else if (cr == 1 && sbi->s_stripe &&
2215 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2216 ext4_mb_scan_aligned(ac, &e4b);
2217 else
2218 ext4_mb_complex_scan_group(ac, &e4b);
2219
2220 ext4_unlock_group(sb, group);
2221 ext4_mb_unload_buddy(&e4b);
2222
2223 if (ac->ac_status != AC_STATUS_CONTINUE)
2224 break;
2225 }
2226 }
2227
2228 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2229 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2230
2231
2232
2233
2234
2235 ext4_mb_try_best_found(ac, &e4b);
2236 if (ac->ac_status != AC_STATUS_FOUND) {
2237
2238
2239
2240
2241
2242
2243 ac->ac_b_ex.fe_group = 0;
2244 ac->ac_b_ex.fe_start = 0;
2245 ac->ac_b_ex.fe_len = 0;
2246 ac->ac_status = AC_STATUS_CONTINUE;
2247 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2248 cr = 3;
2249 atomic_inc(&sbi->s_mb_lost_chunks);
2250 goto repeat;
2251 }
2252 }
2253 out:
2254 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2255 err = first_err;
2256 return err;
2257 }
2258
2259 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2260 {
2261 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2262 ext4_group_t group;
2263
2264 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2265 return NULL;
2266 group = *pos + 1;
2267 return (void *) ((unsigned long) group);
2268 }
2269
2270 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2271 {
2272 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2273 ext4_group_t group;
2274
2275 ++*pos;
2276 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2277 return NULL;
2278 group = *pos + 1;
2279 return (void *) ((unsigned long) group);
2280 }
2281
2282 static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2283 {
2284 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2285 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2286 int i;
2287 int err, buddy_loaded = 0;
2288 struct ext4_buddy e4b;
2289 struct ext4_group_info *grinfo;
2290 unsigned char blocksize_bits = min_t(unsigned char,
2291 sb->s_blocksize_bits,
2292 EXT4_MAX_BLOCK_LOG_SIZE);
2293 struct sg {
2294 struct ext4_group_info info;
2295 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2296 } sg;
2297
2298 group--;
2299 if (group == 0)
2300 seq_puts(seq, "#group: free frags first ["
2301 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2302 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2303
2304 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2305 sizeof(struct ext4_group_info);
2306
2307 grinfo = ext4_get_group_info(sb, group);
2308
2309 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2310 err = ext4_mb_load_buddy(sb, group, &e4b);
2311 if (err) {
2312 seq_printf(seq, "#%-5u: I/O error\n", group);
2313 return 0;
2314 }
2315 buddy_loaded = 1;
2316 }
2317
2318 memcpy(&sg, ext4_get_group_info(sb, group), i);
2319
2320 if (buddy_loaded)
2321 ext4_mb_unload_buddy(&e4b);
2322
2323 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2324 sg.info.bb_fragments, sg.info.bb_first_free);
2325 for (i = 0; i <= 13; i++)
2326 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2327 sg.info.bb_counters[i] : 0);
2328 seq_printf(seq, " ]\n");
2329
2330 return 0;
2331 }
2332
2333 static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2334 {
2335 }
2336
2337 const struct seq_operations ext4_mb_seq_groups_ops = {
2338 .start = ext4_mb_seq_groups_start,
2339 .next = ext4_mb_seq_groups_next,
2340 .stop = ext4_mb_seq_groups_stop,
2341 .show = ext4_mb_seq_groups_show,
2342 };
2343
2344 static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2345 {
2346 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2347 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2348
2349 BUG_ON(!cachep);
2350 return cachep;
2351 }
2352
2353
2354
2355
2356
2357 int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2358 {
2359 struct ext4_sb_info *sbi = EXT4_SB(sb);
2360 unsigned size;
2361 struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
2362
2363 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2364 EXT4_DESC_PER_BLOCK_BITS(sb);
2365 if (size <= sbi->s_group_info_size)
2366 return 0;
2367
2368 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2369 new_groupinfo = kvzalloc(size, GFP_KERNEL);
2370 if (!new_groupinfo) {
2371 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2372 return -ENOMEM;
2373 }
2374 rcu_read_lock();
2375 old_groupinfo = rcu_dereference(sbi->s_group_info);
2376 if (old_groupinfo)
2377 memcpy(new_groupinfo, old_groupinfo,
2378 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2379 rcu_read_unlock();
2380 rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
2381 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2382 if (old_groupinfo)
2383 ext4_kvfree_array_rcu(old_groupinfo);
2384 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2385 sbi->s_group_info_size);
2386 return 0;
2387 }
2388
2389
2390 int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2391 struct ext4_group_desc *desc)
2392 {
2393 int i;
2394 int metalen = 0;
2395 int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
2396 struct ext4_sb_info *sbi = EXT4_SB(sb);
2397 struct ext4_group_info **meta_group_info;
2398 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2399
2400
2401
2402
2403
2404
2405 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2406 metalen = sizeof(*meta_group_info) <<
2407 EXT4_DESC_PER_BLOCK_BITS(sb);
2408 meta_group_info = kmalloc(metalen, GFP_NOFS);
2409 if (meta_group_info == NULL) {
2410 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2411 "for a buddy group");
2412 goto exit_meta_group_info;
2413 }
2414 rcu_read_lock();
2415 rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
2416 rcu_read_unlock();
2417 }
2418
2419 meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
2420 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2421
2422 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
2423 if (meta_group_info[i] == NULL) {
2424 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2425 goto exit_group_info;
2426 }
2427 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2428 &(meta_group_info[i]->bb_state));
2429
2430
2431
2432
2433
2434 if (ext4_has_group_desc_csum(sb) &&
2435 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
2436 meta_group_info[i]->bb_free =
2437 ext4_free_clusters_after_init(sb, group, desc);
2438 } else {
2439 meta_group_info[i]->bb_free =
2440 ext4_free_group_clusters(sb, desc);
2441 }
2442
2443 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2444 init_rwsem(&meta_group_info[i]->alloc_sem);
2445 meta_group_info[i]->bb_free_root = RB_ROOT;
2446 meta_group_info[i]->bb_largest_free_order = -1;
2447
2448 #ifdef DOUBLE_CHECK
2449 {
2450 struct buffer_head *bh;
2451 meta_group_info[i]->bb_bitmap =
2452 kmalloc(sb->s_blocksize, GFP_NOFS);
2453 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2454 bh = ext4_read_block_bitmap(sb, group);
2455 BUG_ON(IS_ERR_OR_NULL(bh));
2456 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2457 sb->s_blocksize);
2458 put_bh(bh);
2459 }
2460 #endif
2461
2462 return 0;
2463
2464 exit_group_info:
2465
2466 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2467 struct ext4_group_info ***group_info;
2468
2469 rcu_read_lock();
2470 group_info = rcu_dereference(sbi->s_group_info);
2471 kfree(group_info[idx]);
2472 group_info[idx] = NULL;
2473 rcu_read_unlock();
2474 }
2475 exit_meta_group_info:
2476 return -ENOMEM;
2477 }
2478
2479 static int ext4_mb_init_backend(struct super_block *sb)
2480 {
2481 ext4_group_t ngroups = ext4_get_groups_count(sb);
2482 ext4_group_t i;
2483 struct ext4_sb_info *sbi = EXT4_SB(sb);
2484 int err;
2485 struct ext4_group_desc *desc;
2486 struct ext4_group_info ***group_info;
2487 struct kmem_cache *cachep;
2488
2489 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2490 if (err)
2491 return err;
2492
2493 sbi->s_buddy_cache = new_inode(sb);
2494 if (sbi->s_buddy_cache == NULL) {
2495 ext4_msg(sb, KERN_ERR, "can't get new inode");
2496 goto err_freesgi;
2497 }
2498
2499
2500
2501
2502 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2503 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2504 for (i = 0; i < ngroups; i++) {
2505 cond_resched();
2506 desc = ext4_get_group_desc(sb, i, NULL);
2507 if (desc == NULL) {
2508 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2509 goto err_freebuddy;
2510 }
2511 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2512 goto err_freebuddy;
2513 }
2514
2515 return 0;
2516
2517 err_freebuddy:
2518 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2519 while (i-- > 0)
2520 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2521 i = sbi->s_group_info_size;
2522 rcu_read_lock();
2523 group_info = rcu_dereference(sbi->s_group_info);
2524 while (i-- > 0)
2525 kfree(group_info[i]);
2526 rcu_read_unlock();
2527 iput(sbi->s_buddy_cache);
2528 err_freesgi:
2529 rcu_read_lock();
2530 kvfree(rcu_dereference(sbi->s_group_info));
2531 rcu_read_unlock();
2532 return -ENOMEM;
2533 }
2534
2535 static void ext4_groupinfo_destroy_slabs(void)
2536 {
2537 int i;
2538
2539 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2540 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2541 ext4_groupinfo_caches[i] = NULL;
2542 }
2543 }
2544
2545 static int ext4_groupinfo_create_slab(size_t size)
2546 {
2547 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2548 int slab_size;
2549 int blocksize_bits = order_base_2(size);
2550 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2551 struct kmem_cache *cachep;
2552
2553 if (cache_index >= NR_GRPINFO_CACHES)
2554 return -EINVAL;
2555
2556 if (unlikely(cache_index < 0))
2557 cache_index = 0;
2558
2559 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2560 if (ext4_groupinfo_caches[cache_index]) {
2561 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2562 return 0;
2563 }
2564
2565 slab_size = offsetof(struct ext4_group_info,
2566 bb_counters[blocksize_bits + 2]);
2567
2568 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2569 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2570 NULL);
2571
2572 ext4_groupinfo_caches[cache_index] = cachep;
2573
2574 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2575 if (!cachep) {
2576 printk(KERN_EMERG
2577 "EXT4-fs: no memory for groupinfo slab cache\n");
2578 return -ENOMEM;
2579 }
2580
2581 return 0;
2582 }
2583
2584 int ext4_mb_init(struct super_block *sb)
2585 {
2586 struct ext4_sb_info *sbi = EXT4_SB(sb);
2587 unsigned i, j;
2588 unsigned offset, offset_incr;
2589 unsigned max;
2590 int ret;
2591
2592 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2593
2594 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2595 if (sbi->s_mb_offsets == NULL) {
2596 ret = -ENOMEM;
2597 goto out;
2598 }
2599
2600 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2601 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2602 if (sbi->s_mb_maxs == NULL) {
2603 ret = -ENOMEM;
2604 goto out;
2605 }
2606
2607 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2608 if (ret < 0)
2609 goto out;
2610
2611
2612 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2613 sbi->s_mb_offsets[0] = 0;
2614
2615 i = 1;
2616 offset = 0;
2617 offset_incr = 1 << (sb->s_blocksize_bits - 1);
2618 max = sb->s_blocksize << 2;
2619 do {
2620 sbi->s_mb_offsets[i] = offset;
2621 sbi->s_mb_maxs[i] = max;
2622 offset += offset_incr;
2623 offset_incr = offset_incr >> 1;
2624 max = max >> 1;
2625 i++;
2626 } while (i <= sb->s_blocksize_bits + 1);
2627
2628 spin_lock_init(&sbi->s_md_lock);
2629 spin_lock_init(&sbi->s_bal_lock);
2630 sbi->s_mb_free_pending = 0;
2631 INIT_LIST_HEAD(&sbi->s_freed_data_list);
2632
2633 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2634 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2635 sbi->s_mb_stats = MB_DEFAULT_STATS;
2636 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2637 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2651 sbi->s_cluster_bits, 32);
2652
2653
2654
2655
2656
2657
2658
2659
2660 if (sbi->s_stripe > 1) {
2661 sbi->s_mb_group_prealloc = roundup(
2662 sbi->s_mb_group_prealloc, sbi->s_stripe);
2663 }
2664
2665 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2666 if (sbi->s_locality_groups == NULL) {
2667 ret = -ENOMEM;
2668 goto out;
2669 }
2670 for_each_possible_cpu(i) {
2671 struct ext4_locality_group *lg;
2672 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2673 mutex_init(&lg->lg_mutex);
2674 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2675 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2676 spin_lock_init(&lg->lg_prealloc_lock);
2677 }
2678
2679
2680 ret = ext4_mb_init_backend(sb);
2681 if (ret != 0)
2682 goto out_free_locality_groups;
2683
2684 return 0;
2685
2686 out_free_locality_groups:
2687 free_percpu(sbi->s_locality_groups);
2688 sbi->s_locality_groups = NULL;
2689 out:
2690 kfree(sbi->s_mb_offsets);
2691 sbi->s_mb_offsets = NULL;
2692 kfree(sbi->s_mb_maxs);
2693 sbi->s_mb_maxs = NULL;
2694 return ret;
2695 }
2696
2697
2698 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2699 {
2700 struct ext4_prealloc_space *pa;
2701 struct list_head *cur, *tmp;
2702 int count = 0;
2703
2704 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2705 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2706 list_del(&pa->pa_group_list);
2707 count++;
2708 kmem_cache_free(ext4_pspace_cachep, pa);
2709 }
2710 if (count)
2711 mb_debug(1, "mballoc: %u PAs left\n", count);
2712
2713 }
2714
2715 int ext4_mb_release(struct super_block *sb)
2716 {
2717 ext4_group_t ngroups = ext4_get_groups_count(sb);
2718 ext4_group_t i;
2719 int num_meta_group_infos;
2720 struct ext4_group_info *grinfo, ***group_info;
2721 struct ext4_sb_info *sbi = EXT4_SB(sb);
2722 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2723
2724 if (sbi->s_group_info) {
2725 for (i = 0; i < ngroups; i++) {
2726 cond_resched();
2727 grinfo = ext4_get_group_info(sb, i);
2728 #ifdef DOUBLE_CHECK
2729 kfree(grinfo->bb_bitmap);
2730 #endif
2731 ext4_lock_group(sb, i);
2732 ext4_mb_cleanup_pa(grinfo);
2733 ext4_unlock_group(sb, i);
2734 kmem_cache_free(cachep, grinfo);
2735 }
2736 num_meta_group_infos = (ngroups +
2737 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2738 EXT4_DESC_PER_BLOCK_BITS(sb);
2739 rcu_read_lock();
2740 group_info = rcu_dereference(sbi->s_group_info);
2741 for (i = 0; i < num_meta_group_infos; i++)
2742 kfree(group_info[i]);
2743 kvfree(group_info);
2744 rcu_read_unlock();
2745 }
2746 kfree(sbi->s_mb_offsets);
2747 kfree(sbi->s_mb_maxs);
2748 iput(sbi->s_buddy_cache);
2749 if (sbi->s_mb_stats) {
2750 ext4_msg(sb, KERN_INFO,
2751 "mballoc: %u blocks %u reqs (%u success)",
2752 atomic_read(&sbi->s_bal_allocated),
2753 atomic_read(&sbi->s_bal_reqs),
2754 atomic_read(&sbi->s_bal_success));
2755 ext4_msg(sb, KERN_INFO,
2756 "mballoc: %u extents scanned, %u goal hits, "
2757 "%u 2^N hits, %u breaks, %u lost",
2758 atomic_read(&sbi->s_bal_ex_scanned),
2759 atomic_read(&sbi->s_bal_goals),
2760 atomic_read(&sbi->s_bal_2orders),
2761 atomic_read(&sbi->s_bal_breaks),
2762 atomic_read(&sbi->s_mb_lost_chunks));
2763 ext4_msg(sb, KERN_INFO,
2764 "mballoc: %lu generated and it took %Lu",
2765 sbi->s_mb_buddies_generated,
2766 sbi->s_mb_generation_time);
2767 ext4_msg(sb, KERN_INFO,
2768 "mballoc: %u preallocated, %u discarded",
2769 atomic_read(&sbi->s_mb_preallocated),
2770 atomic_read(&sbi->s_mb_discarded));
2771 }
2772
2773 free_percpu(sbi->s_locality_groups);
2774
2775 return 0;
2776 }
2777
2778 static inline int ext4_issue_discard(struct super_block *sb,
2779 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
2780 struct bio **biop)
2781 {
2782 ext4_fsblk_t discard_block;
2783
2784 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2785 ext4_group_first_block_no(sb, block_group));
2786 count = EXT4_C2B(EXT4_SB(sb), count);
2787 trace_ext4_discard_blocks(sb,
2788 (unsigned long long) discard_block, count);
2789 if (biop) {
2790 return __blkdev_issue_discard(sb->s_bdev,
2791 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
2792 (sector_t)count << (sb->s_blocksize_bits - 9),
2793 GFP_NOFS, 0, biop);
2794 } else
2795 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2796 }
2797
2798 static void ext4_free_data_in_buddy(struct super_block *sb,
2799 struct ext4_free_data *entry)
2800 {
2801 struct ext4_buddy e4b;
2802 struct ext4_group_info *db;
2803 int err, count = 0, count2 = 0;
2804
2805 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2806 entry->efd_count, entry->efd_group, entry);
2807
2808 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2809
2810 BUG_ON(err != 0);
2811
2812 spin_lock(&EXT4_SB(sb)->s_md_lock);
2813 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
2814 spin_unlock(&EXT4_SB(sb)->s_md_lock);
2815
2816 db = e4b.bd_info;
2817
2818 count += entry->efd_count;
2819 count2++;
2820 ext4_lock_group(sb, entry->efd_group);
2821
2822 rb_erase(&entry->efd_node, &(db->bb_free_root));
2823 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2824
2825
2826
2827
2828
2829
2830
2831 if (!test_opt(sb, DISCARD))
2832 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2833
2834 if (!db->bb_free_root.rb_node) {
2835
2836
2837
2838 put_page(e4b.bd_buddy_page);
2839 put_page(e4b.bd_bitmap_page);
2840 }
2841 ext4_unlock_group(sb, entry->efd_group);
2842 kmem_cache_free(ext4_free_data_cachep, entry);
2843 ext4_mb_unload_buddy(&e4b);
2844
2845 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2846 }
2847
2848
2849
2850
2851
2852 void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
2853 {
2854 struct ext4_sb_info *sbi = EXT4_SB(sb);
2855 struct ext4_free_data *entry, *tmp;
2856 struct bio *discard_bio = NULL;
2857 struct list_head freed_data_list;
2858 struct list_head *cut_pos = NULL;
2859 int err;
2860
2861 INIT_LIST_HEAD(&freed_data_list);
2862
2863 spin_lock(&sbi->s_md_lock);
2864 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
2865 if (entry->efd_tid != commit_tid)
2866 break;
2867 cut_pos = &entry->efd_list;
2868 }
2869 if (cut_pos)
2870 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
2871 cut_pos);
2872 spin_unlock(&sbi->s_md_lock);
2873
2874 if (test_opt(sb, DISCARD)) {
2875 list_for_each_entry(entry, &freed_data_list, efd_list) {
2876 err = ext4_issue_discard(sb, entry->efd_group,
2877 entry->efd_start_cluster,
2878 entry->efd_count,
2879 &discard_bio);
2880 if (err && err != -EOPNOTSUPP) {
2881 ext4_msg(sb, KERN_WARNING, "discard request in"
2882 " group:%d block:%d count:%d failed"
2883 " with %d", entry->efd_group,
2884 entry->efd_start_cluster,
2885 entry->efd_count, err);
2886 } else if (err == -EOPNOTSUPP)
2887 break;
2888 }
2889
2890 if (discard_bio) {
2891 submit_bio_wait(discard_bio);
2892 bio_put(discard_bio);
2893 }
2894 }
2895
2896 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
2897 ext4_free_data_in_buddy(sb, entry);
2898 }
2899
2900 int __init ext4_init_mballoc(void)
2901 {
2902 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2903 SLAB_RECLAIM_ACCOUNT);
2904 if (ext4_pspace_cachep == NULL)
2905 return -ENOMEM;
2906
2907 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2908 SLAB_RECLAIM_ACCOUNT);
2909 if (ext4_ac_cachep == NULL) {
2910 kmem_cache_destroy(ext4_pspace_cachep);
2911 return -ENOMEM;
2912 }
2913
2914 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2915 SLAB_RECLAIM_ACCOUNT);
2916 if (ext4_free_data_cachep == NULL) {
2917 kmem_cache_destroy(ext4_pspace_cachep);
2918 kmem_cache_destroy(ext4_ac_cachep);
2919 return -ENOMEM;
2920 }
2921 return 0;
2922 }
2923
2924 void ext4_exit_mballoc(void)
2925 {
2926
2927
2928
2929
2930 rcu_barrier();
2931 kmem_cache_destroy(ext4_pspace_cachep);
2932 kmem_cache_destroy(ext4_ac_cachep);
2933 kmem_cache_destroy(ext4_free_data_cachep);
2934 ext4_groupinfo_destroy_slabs();
2935 }
2936
2937
2938
2939
2940
2941
2942 static noinline_for_stack int
2943 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2944 handle_t *handle, unsigned int reserv_clstrs)
2945 {
2946 struct buffer_head *bitmap_bh = NULL;
2947 struct ext4_group_desc *gdp;
2948 struct buffer_head *gdp_bh;
2949 struct ext4_sb_info *sbi;
2950 struct super_block *sb;
2951 ext4_fsblk_t block;
2952 int err, len;
2953
2954 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2955 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2956
2957 sb = ac->ac_sb;
2958 sbi = EXT4_SB(sb);
2959
2960 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2961 if (IS_ERR(bitmap_bh)) {
2962 err = PTR_ERR(bitmap_bh);
2963 bitmap_bh = NULL;
2964 goto out_err;
2965 }
2966
2967 BUFFER_TRACE(bitmap_bh, "getting write access");
2968 err = ext4_journal_get_write_access(handle, bitmap_bh);
2969 if (err)
2970 goto out_err;
2971
2972 err = -EIO;
2973 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2974 if (!gdp)
2975 goto out_err;
2976
2977 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2978 ext4_free_group_clusters(sb, gdp));
2979
2980 BUFFER_TRACE(gdp_bh, "get_write_access");
2981 err = ext4_journal_get_write_access(handle, gdp_bh);
2982 if (err)
2983 goto out_err;
2984
2985 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2986
2987 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2988 if (!ext4_data_block_valid(sbi, block, len)) {
2989 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2990 "fs metadata", block, block+len);
2991
2992
2993
2994
2995 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2996 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2997 ac->ac_b_ex.fe_len);
2998 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2999 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3000 if (!err)
3001 err = -EFSCORRUPTED;
3002 goto out_err;
3003 }
3004
3005 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3006 #ifdef AGGRESSIVE_CHECK
3007 {
3008 int i;
3009 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3010 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3011 bitmap_bh->b_data));
3012 }
3013 }
3014 #endif
3015 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3016 ac->ac_b_ex.fe_len);
3017 if (ext4_has_group_desc_csum(sb) &&
3018 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3019 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3020 ext4_free_group_clusters_set(sb, gdp,
3021 ext4_free_clusters_after_init(sb,
3022 ac->ac_b_ex.fe_group, gdp));
3023 }
3024 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3025 ext4_free_group_clusters_set(sb, gdp, len);
3026 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3027 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3028
3029 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3030 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3031
3032
3033
3034 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3035
3036 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3037 reserv_clstrs);
3038
3039 if (sbi->s_log_groups_per_flex) {
3040 ext4_group_t flex_group = ext4_flex_group(sbi,
3041 ac->ac_b_ex.fe_group);
3042 atomic64_sub(ac->ac_b_ex.fe_len,
3043 &sbi_array_rcu_deref(sbi, s_flex_groups,
3044 flex_group)->free_clusters);
3045 }
3046
3047 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3048 if (err)
3049 goto out_err;
3050 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3051
3052 out_err:
3053 brelse(bitmap_bh);
3054 return err;
3055 }
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066 static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3067 {
3068 struct super_block *sb = ac->ac_sb;
3069 struct ext4_locality_group *lg = ac->ac_lg;
3070
3071 BUG_ON(lg == NULL);
3072 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3073 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3074 current->pid, ac->ac_g_ex.fe_len);
3075 }
3076
3077
3078
3079
3080
3081 static noinline_for_stack void
3082 ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3083 struct ext4_allocation_request *ar)
3084 {
3085 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3086 int bsbits, max;
3087 ext4_lblk_t end;
3088 loff_t size, start_off;
3089 loff_t orig_size __maybe_unused;
3090 ext4_lblk_t start;
3091 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3092 struct ext4_prealloc_space *pa;
3093
3094
3095
3096 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3097 return;
3098
3099
3100 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3101 return;
3102
3103
3104
3105 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3106 return;
3107
3108 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3109 ext4_mb_normalize_group_request(ac);
3110 return ;
3111 }
3112
3113 bsbits = ac->ac_sb->s_blocksize_bits;
3114
3115
3116
3117 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3118 size = size << bsbits;
3119 if (size < i_size_read(ac->ac_inode))
3120 size = i_size_read(ac->ac_inode);
3121 orig_size = size;
3122
3123
3124 max = 2 << bsbits;
3125
3126 #define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3127 (req <= (size) || max <= (chunk_size))
3128
3129
3130
3131 start_off = 0;
3132 if (size <= 16 * 1024) {
3133 size = 16 * 1024;
3134 } else if (size <= 32 * 1024) {
3135 size = 32 * 1024;
3136 } else if (size <= 64 * 1024) {
3137 size = 64 * 1024;
3138 } else if (size <= 128 * 1024) {
3139 size = 128 * 1024;
3140 } else if (size <= 256 * 1024) {
3141 size = 256 * 1024;
3142 } else if (size <= 512 * 1024) {
3143 size = 512 * 1024;
3144 } else if (size <= 1024 * 1024) {
3145 size = 1024 * 1024;
3146 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3147 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3148 (21 - bsbits)) << 21;
3149 size = 2 * 1024 * 1024;
3150 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3151 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3152 (22 - bsbits)) << 22;
3153 size = 4 * 1024 * 1024;
3154 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3155 (8<<20)>>bsbits, max, 8 * 1024)) {
3156 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3157 (23 - bsbits)) << 23;
3158 size = 8 * 1024 * 1024;
3159 } else {
3160 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3161 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3162 ac->ac_o_ex.fe_len) << bsbits;
3163 }
3164 size = size >> bsbits;
3165 start = start_off >> bsbits;
3166
3167
3168 if (ar->pleft && start <= ar->lleft) {
3169 size -= ar->lleft + 1 - start;
3170 start = ar->lleft + 1;
3171 }
3172 if (ar->pright && start + size - 1 >= ar->lright)
3173 size -= start + size - ar->lright;
3174
3175
3176
3177
3178
3179 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
3180 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
3181
3182 end = start + size;
3183
3184
3185 rcu_read_lock();
3186 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3187 ext4_lblk_t pa_end;
3188
3189 if (pa->pa_deleted)
3190 continue;
3191 spin_lock(&pa->pa_lock);
3192 if (pa->pa_deleted) {
3193 spin_unlock(&pa->pa_lock);
3194 continue;
3195 }
3196
3197 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3198 pa->pa_len);
3199
3200
3201 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3202 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3203
3204
3205 if (pa->pa_lstart >= end || pa_end <= start) {
3206 spin_unlock(&pa->pa_lock);
3207 continue;
3208 }
3209 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3210
3211
3212 if (pa_end <= ac->ac_o_ex.fe_logical) {
3213 BUG_ON(pa_end < start);
3214 start = pa_end;
3215 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3216 BUG_ON(pa->pa_lstart > end);
3217 end = pa->pa_lstart;
3218 }
3219 spin_unlock(&pa->pa_lock);
3220 }
3221 rcu_read_unlock();
3222 size = end - start;
3223
3224
3225 rcu_read_lock();
3226 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3227 ext4_lblk_t pa_end;
3228
3229 spin_lock(&pa->pa_lock);
3230 if (pa->pa_deleted == 0) {
3231 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3232 pa->pa_len);
3233 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3234 }
3235 spin_unlock(&pa->pa_lock);
3236 }
3237 rcu_read_unlock();
3238
3239 if (start + size <= ac->ac_o_ex.fe_logical &&
3240 start > ac->ac_o_ex.fe_logical) {
3241 ext4_msg(ac->ac_sb, KERN_ERR,
3242 "start %lu, size %lu, fe_logical %lu",
3243 (unsigned long) start, (unsigned long) size,
3244 (unsigned long) ac->ac_o_ex.fe_logical);
3245 BUG();
3246 }
3247 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3248
3249
3250
3251
3252
3253 ac->ac_g_ex.fe_logical = start;
3254 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3255
3256
3257 if (ar->pright && (ar->lright == (start + size))) {
3258
3259 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3260 &ac->ac_f_ex.fe_group,
3261 &ac->ac_f_ex.fe_start);
3262 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3263 }
3264 if (ar->pleft && (ar->lleft + 1 == start)) {
3265
3266 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3267 &ac->ac_f_ex.fe_group,
3268 &ac->ac_f_ex.fe_start);
3269 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3270 }
3271
3272 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3273 (unsigned) orig_size, (unsigned) start);
3274 }
3275
3276 static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3277 {
3278 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3279
3280 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3281 atomic_inc(&sbi->s_bal_reqs);
3282 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3283 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3284 atomic_inc(&sbi->s_bal_success);
3285 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3286 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3287 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3288 atomic_inc(&sbi->s_bal_goals);
3289 if (ac->ac_found > sbi->s_mb_max_to_scan)
3290 atomic_inc(&sbi->s_bal_breaks);
3291 }
3292
3293 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3294 trace_ext4_mballoc_alloc(ac);
3295 else
3296 trace_ext4_mballoc_prealloc(ac);
3297 }
3298
3299
3300
3301
3302
3303
3304
3305 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3306 {
3307 struct ext4_prealloc_space *pa = ac->ac_pa;
3308 struct ext4_buddy e4b;
3309 int err;
3310
3311 if (pa == NULL) {
3312 if (ac->ac_f_ex.fe_len == 0)
3313 return;
3314 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3315 if (err) {
3316
3317
3318
3319
3320
3321 WARN(1, "mb_load_buddy failed (%d)", err);
3322 return;
3323 }
3324 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3325 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3326 ac->ac_f_ex.fe_len);
3327 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3328 ext4_mb_unload_buddy(&e4b);
3329 return;
3330 }
3331 if (pa->pa_type == MB_INODE_PA)
3332 pa->pa_free += ac->ac_b_ex.fe_len;
3333 }
3334
3335
3336
3337
3338 static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3339 struct ext4_prealloc_space *pa)
3340 {
3341 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3342 ext4_fsblk_t start;
3343 ext4_fsblk_t end;
3344 int len;
3345
3346
3347 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3348 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3349 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3350 len = EXT4_NUM_B2C(sbi, end - start);
3351 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3352 &ac->ac_b_ex.fe_start);
3353 ac->ac_b_ex.fe_len = len;
3354 ac->ac_status = AC_STATUS_FOUND;
3355 ac->ac_pa = pa;
3356
3357 BUG_ON(start < pa->pa_pstart);
3358 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3359 BUG_ON(pa->pa_free < len);
3360 pa->pa_free -= len;
3361
3362 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3363 }
3364
3365
3366
3367
3368 static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3369 struct ext4_prealloc_space *pa)
3370 {
3371 unsigned int len = ac->ac_o_ex.fe_len;
3372
3373 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3374 &ac->ac_b_ex.fe_group,
3375 &ac->ac_b_ex.fe_start);
3376 ac->ac_b_ex.fe_len = len;
3377 ac->ac_status = AC_STATUS_FOUND;
3378 ac->ac_pa = pa;
3379
3380
3381
3382
3383
3384
3385
3386 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3387 }
3388
3389
3390
3391
3392
3393
3394
3395 static struct ext4_prealloc_space *
3396 ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3397 struct ext4_prealloc_space *pa,
3398 struct ext4_prealloc_space *cpa)
3399 {
3400 ext4_fsblk_t cur_distance, new_distance;
3401
3402 if (cpa == NULL) {
3403 atomic_inc(&pa->pa_count);
3404 return pa;
3405 }
3406 cur_distance = abs(goal_block - cpa->pa_pstart);
3407 new_distance = abs(goal_block - pa->pa_pstart);
3408
3409 if (cur_distance <= new_distance)
3410 return cpa;
3411
3412
3413 atomic_dec(&cpa->pa_count);
3414 atomic_inc(&pa->pa_count);
3415 return pa;
3416 }
3417
3418
3419
3420
3421 static noinline_for_stack int
3422 ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3423 {
3424 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3425 int order, i;
3426 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3427 struct ext4_locality_group *lg;
3428 struct ext4_prealloc_space *pa, *cpa = NULL;
3429 ext4_fsblk_t goal_block;
3430
3431
3432 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3433 return 0;
3434
3435
3436 rcu_read_lock();
3437 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3438
3439
3440
3441 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3442 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3443 EXT4_C2B(sbi, pa->pa_len)))
3444 continue;
3445
3446
3447 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3448 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3449 EXT4_MAX_BLOCK_FILE_PHYS))
3450 continue;
3451
3452
3453 spin_lock(&pa->pa_lock);
3454 if (pa->pa_deleted == 0 && pa->pa_free) {
3455 atomic_inc(&pa->pa_count);
3456 ext4_mb_use_inode_pa(ac, pa);
3457 spin_unlock(&pa->pa_lock);
3458 ac->ac_criteria = 10;
3459 rcu_read_unlock();
3460 return 1;
3461 }
3462 spin_unlock(&pa->pa_lock);
3463 }
3464 rcu_read_unlock();
3465
3466
3467 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3468 return 0;
3469
3470
3471 lg = ac->ac_lg;
3472 if (lg == NULL)
3473 return 0;
3474 order = fls(ac->ac_o_ex.fe_len) - 1;
3475 if (order > PREALLOC_TB_SIZE - 1)
3476
3477 order = PREALLOC_TB_SIZE - 1;
3478
3479 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3480
3481
3482
3483
3484 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3485 rcu_read_lock();
3486 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3487 pa_inode_list) {
3488 spin_lock(&pa->pa_lock);
3489 if (pa->pa_deleted == 0 &&
3490 pa->pa_free >= ac->ac_o_ex.fe_len) {
3491
3492 cpa = ext4_mb_check_group_pa(goal_block,
3493 pa, cpa);
3494 }
3495 spin_unlock(&pa->pa_lock);
3496 }
3497 rcu_read_unlock();
3498 }
3499 if (cpa) {
3500 ext4_mb_use_group_pa(ac, cpa);
3501 ac->ac_criteria = 20;
3502 return 1;
3503 }
3504 return 0;
3505 }
3506
3507
3508
3509
3510
3511
3512
3513 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3514 ext4_group_t group)
3515 {
3516 struct rb_node *n;
3517 struct ext4_group_info *grp;
3518 struct ext4_free_data *entry;
3519
3520 grp = ext4_get_group_info(sb, group);
3521 n = rb_first(&(grp->bb_free_root));
3522
3523 while (n) {
3524 entry = rb_entry(n, struct ext4_free_data, efd_node);
3525 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3526 n = rb_next(n);
3527 }
3528 return;
3529 }
3530
3531
3532
3533
3534
3535
3536 static noinline_for_stack
3537 void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3538 ext4_group_t group)
3539 {
3540 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3541 struct ext4_prealloc_space *pa;
3542 struct list_head *cur;
3543 ext4_group_t groupnr;
3544 ext4_grpblk_t start;
3545 int preallocated = 0;
3546 int len;
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556 list_for_each(cur, &grp->bb_prealloc_list) {
3557 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3558 spin_lock(&pa->pa_lock);
3559 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3560 &groupnr, &start);
3561 len = pa->pa_len;
3562 spin_unlock(&pa->pa_lock);
3563 if (unlikely(len == 0))
3564 continue;
3565 BUG_ON(groupnr != group);
3566 ext4_set_bits(bitmap, start, len);
3567 preallocated += len;
3568 }
3569 mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
3570 }
3571
3572 static void ext4_mb_pa_callback(struct rcu_head *head)
3573 {
3574 struct ext4_prealloc_space *pa;
3575 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3576
3577 BUG_ON(atomic_read(&pa->pa_count));
3578 BUG_ON(pa->pa_deleted == 0);
3579 kmem_cache_free(ext4_pspace_cachep, pa);
3580 }
3581
3582
3583
3584
3585
3586 static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3587 struct super_block *sb, struct ext4_prealloc_space *pa)
3588 {
3589 ext4_group_t grp;
3590 ext4_fsblk_t grp_blk;
3591
3592
3593 spin_lock(&pa->pa_lock);
3594 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3595 spin_unlock(&pa->pa_lock);
3596 return;
3597 }
3598
3599 if (pa->pa_deleted == 1) {
3600 spin_unlock(&pa->pa_lock);
3601 return;
3602 }
3603
3604 pa->pa_deleted = 1;
3605 spin_unlock(&pa->pa_lock);
3606
3607 grp_blk = pa->pa_pstart;
3608
3609
3610
3611
3612 if (pa->pa_type == MB_GROUP_PA)
3613 grp_blk--;
3614
3615 grp = ext4_get_group_number(sb, grp_blk);
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631 ext4_lock_group(sb, grp);
3632 list_del(&pa->pa_group_list);
3633 ext4_unlock_group(sb, grp);
3634
3635 spin_lock(pa->pa_obj_lock);
3636 list_del_rcu(&pa->pa_inode_list);
3637 spin_unlock(pa->pa_obj_lock);
3638
3639 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3640 }
3641
3642
3643
3644
3645 static noinline_for_stack int
3646 ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3647 {
3648 struct super_block *sb = ac->ac_sb;
3649 struct ext4_sb_info *sbi = EXT4_SB(sb);
3650 struct ext4_prealloc_space *pa;
3651 struct ext4_group_info *grp;
3652 struct ext4_inode_info *ei;
3653
3654
3655 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3656 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3657 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3658
3659 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3660 if (pa == NULL)
3661 return -ENOMEM;
3662
3663 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3664 int winl;
3665 int wins;
3666 int win;
3667 int offs;
3668
3669
3670
3671
3672 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3673 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3674
3675
3676
3677
3678 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3679
3680
3681 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3682
3683
3684 win = min(winl, wins);
3685
3686 offs = ac->ac_o_ex.fe_logical %
3687 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3688 if (offs && offs < win)
3689 win = offs;
3690
3691 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3692 EXT4_NUM_B2C(sbi, win);
3693 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3694 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3695 }
3696
3697
3698
3699 ac->ac_f_ex = ac->ac_b_ex;
3700
3701 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3702 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3703 pa->pa_len = ac->ac_b_ex.fe_len;
3704 pa->pa_free = pa->pa_len;
3705 atomic_set(&pa->pa_count, 1);
3706 spin_lock_init(&pa->pa_lock);
3707 INIT_LIST_HEAD(&pa->pa_inode_list);
3708 INIT_LIST_HEAD(&pa->pa_group_list);
3709 pa->pa_deleted = 0;
3710 pa->pa_type = MB_INODE_PA;
3711
3712 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3713 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3714 trace_ext4_mb_new_inode_pa(ac, pa);
3715
3716 ext4_mb_use_inode_pa(ac, pa);
3717 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3718
3719 ei = EXT4_I(ac->ac_inode);
3720 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3721
3722 pa->pa_obj_lock = &ei->i_prealloc_lock;
3723 pa->pa_inode = ac->ac_inode;
3724
3725 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3726 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3727 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3728
3729 spin_lock(pa->pa_obj_lock);
3730 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3731 spin_unlock(pa->pa_obj_lock);
3732
3733 return 0;
3734 }
3735
3736
3737
3738
3739 static noinline_for_stack int
3740 ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3741 {
3742 struct super_block *sb = ac->ac_sb;
3743 struct ext4_locality_group *lg;
3744 struct ext4_prealloc_space *pa;
3745 struct ext4_group_info *grp;
3746
3747
3748 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3749 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3750 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3751
3752 BUG_ON(ext4_pspace_cachep == NULL);
3753 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3754 if (pa == NULL)
3755 return -ENOMEM;
3756
3757
3758
3759 ac->ac_f_ex = ac->ac_b_ex;
3760
3761 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3762 pa->pa_lstart = pa->pa_pstart;
3763 pa->pa_len = ac->ac_b_ex.fe_len;
3764 pa->pa_free = pa->pa_len;
3765 atomic_set(&pa->pa_count, 1);
3766 spin_lock_init(&pa->pa_lock);
3767 INIT_LIST_HEAD(&pa->pa_inode_list);
3768 INIT_LIST_HEAD(&pa->pa_group_list);
3769 pa->pa_deleted = 0;
3770 pa->pa_type = MB_GROUP_PA;
3771
3772 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3773 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3774 trace_ext4_mb_new_group_pa(ac, pa);
3775
3776 ext4_mb_use_group_pa(ac, pa);
3777 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3778
3779 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3780 lg = ac->ac_lg;
3781 BUG_ON(lg == NULL);
3782
3783 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3784 pa->pa_inode = NULL;
3785
3786 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3787 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3788 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3789
3790
3791
3792
3793
3794 return 0;
3795 }
3796
3797 static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3798 {
3799 int err;
3800
3801 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3802 err = ext4_mb_new_group_pa(ac);
3803 else
3804 err = ext4_mb_new_inode_pa(ac);
3805 return err;
3806 }
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816 static noinline_for_stack int
3817 ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3818 struct ext4_prealloc_space *pa)
3819 {
3820 struct super_block *sb = e4b->bd_sb;
3821 struct ext4_sb_info *sbi = EXT4_SB(sb);
3822 unsigned int end;
3823 unsigned int next;
3824 ext4_group_t group;
3825 ext4_grpblk_t bit;
3826 unsigned long long grp_blk_start;
3827 int free = 0;
3828
3829 BUG_ON(pa->pa_deleted == 0);
3830 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3831 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3832 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3833 end = bit + pa->pa_len;
3834
3835 while (bit < end) {
3836 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3837 if (bit >= end)
3838 break;
3839 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3840 mb_debug(1, " free preallocated %u/%u in group %u\n",
3841 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3842 (unsigned) next - bit, (unsigned) group);
3843 free += next - bit;
3844
3845 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3846 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3847 EXT4_C2B(sbi, bit)),
3848 next - bit);
3849 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3850 bit = next + 1;
3851 }
3852 if (free != pa->pa_free) {
3853 ext4_msg(e4b->bd_sb, KERN_CRIT,
3854 "pa %p: logic %lu, phys. %lu, len %lu",
3855 pa, (unsigned long) pa->pa_lstart,
3856 (unsigned long) pa->pa_pstart,
3857 (unsigned long) pa->pa_len);
3858 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3859 free, pa->pa_free);
3860
3861
3862
3863
3864 }
3865 atomic_add(free, &sbi->s_mb_discarded);
3866
3867 return 0;
3868 }
3869
3870 static noinline_for_stack int
3871 ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3872 struct ext4_prealloc_space *pa)
3873 {
3874 struct super_block *sb = e4b->bd_sb;
3875 ext4_group_t group;
3876 ext4_grpblk_t bit;
3877
3878 trace_ext4_mb_release_group_pa(sb, pa);
3879 BUG_ON(pa->pa_deleted == 0);
3880 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3881 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3882 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3883 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3884 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3885
3886 return 0;
3887 }
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898 static noinline_for_stack int
3899 ext4_mb_discard_group_preallocations(struct super_block *sb,
3900 ext4_group_t group, int needed)
3901 {
3902 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3903 struct buffer_head *bitmap_bh = NULL;
3904 struct ext4_prealloc_space *pa, *tmp;
3905 struct list_head list;
3906 struct ext4_buddy e4b;
3907 int err;
3908 int busy = 0;
3909 int free = 0;
3910
3911 mb_debug(1, "discard preallocation for group %u\n", group);
3912
3913 if (list_empty(&grp->bb_prealloc_list))
3914 return 0;
3915
3916 bitmap_bh = ext4_read_block_bitmap(sb, group);
3917 if (IS_ERR(bitmap_bh)) {
3918 err = PTR_ERR(bitmap_bh);
3919 ext4_error(sb, "Error %d reading block bitmap for %u",
3920 err, group);
3921 return 0;
3922 }
3923
3924 err = ext4_mb_load_buddy(sb, group, &e4b);
3925 if (err) {
3926 ext4_warning(sb, "Error %d loading buddy information for %u",
3927 err, group);
3928 put_bh(bitmap_bh);
3929 return 0;
3930 }
3931
3932 if (needed == 0)
3933 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3934
3935 INIT_LIST_HEAD(&list);
3936 repeat:
3937 ext4_lock_group(sb, group);
3938 list_for_each_entry_safe(pa, tmp,
3939 &grp->bb_prealloc_list, pa_group_list) {
3940 spin_lock(&pa->pa_lock);
3941 if (atomic_read(&pa->pa_count)) {
3942 spin_unlock(&pa->pa_lock);
3943 busy = 1;
3944 continue;
3945 }
3946 if (pa->pa_deleted) {
3947 spin_unlock(&pa->pa_lock);
3948 continue;
3949 }
3950
3951
3952 pa->pa_deleted = 1;
3953
3954
3955 free += pa->pa_free;
3956
3957 spin_unlock(&pa->pa_lock);
3958
3959 list_del(&pa->pa_group_list);
3960 list_add(&pa->u.pa_tmp_list, &list);
3961 }
3962
3963
3964 if (free < needed && busy) {
3965 busy = 0;
3966 ext4_unlock_group(sb, group);
3967 cond_resched();
3968 goto repeat;
3969 }
3970
3971
3972 if (list_empty(&list)) {
3973 BUG_ON(free != 0);
3974 goto out;
3975 }
3976
3977
3978 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3979
3980
3981 spin_lock(pa->pa_obj_lock);
3982 list_del_rcu(&pa->pa_inode_list);
3983 spin_unlock(pa->pa_obj_lock);
3984
3985 if (pa->pa_type == MB_GROUP_PA)
3986 ext4_mb_release_group_pa(&e4b, pa);
3987 else
3988 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3989
3990 list_del(&pa->u.pa_tmp_list);
3991 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3992 }
3993
3994 out:
3995 ext4_unlock_group(sb, group);
3996 ext4_mb_unload_buddy(&e4b);
3997 put_bh(bitmap_bh);
3998 return free;
3999 }
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010 void ext4_discard_preallocations(struct inode *inode)
4011 {
4012 struct ext4_inode_info *ei = EXT4_I(inode);
4013 struct super_block *sb = inode->i_sb;
4014 struct buffer_head *bitmap_bh = NULL;
4015 struct ext4_prealloc_space *pa, *tmp;
4016 ext4_group_t group = 0;
4017 struct list_head list;
4018 struct ext4_buddy e4b;
4019 int err;
4020
4021 if (!S_ISREG(inode->i_mode)) {
4022
4023 return;
4024 }
4025
4026 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
4027 trace_ext4_discard_preallocations(inode);
4028
4029 INIT_LIST_HEAD(&list);
4030
4031 repeat:
4032
4033 spin_lock(&ei->i_prealloc_lock);
4034 while (!list_empty(&ei->i_prealloc_list)) {
4035 pa = list_entry(ei->i_prealloc_list.next,
4036 struct ext4_prealloc_space, pa_inode_list);
4037 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4038 spin_lock(&pa->pa_lock);
4039 if (atomic_read(&pa->pa_count)) {
4040
4041
4042 spin_unlock(&pa->pa_lock);
4043 spin_unlock(&ei->i_prealloc_lock);
4044 ext4_msg(sb, KERN_ERR,
4045 "uh-oh! used pa while discarding");
4046 WARN_ON(1);
4047 schedule_timeout_uninterruptible(HZ);
4048 goto repeat;
4049
4050 }
4051 if (pa->pa_deleted == 0) {
4052 pa->pa_deleted = 1;
4053 spin_unlock(&pa->pa_lock);
4054 list_del_rcu(&pa->pa_inode_list);
4055 list_add(&pa->u.pa_tmp_list, &list);
4056 continue;
4057 }
4058
4059
4060 spin_unlock(&pa->pa_lock);
4061 spin_unlock(&ei->i_prealloc_lock);
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075 schedule_timeout_uninterruptible(HZ);
4076 goto repeat;
4077 }
4078 spin_unlock(&ei->i_prealloc_lock);
4079
4080 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4081 BUG_ON(pa->pa_type != MB_INODE_PA);
4082 group = ext4_get_group_number(sb, pa->pa_pstart);
4083
4084 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4085 GFP_NOFS|__GFP_NOFAIL);
4086 if (err) {
4087 ext4_error(sb, "Error %d loading buddy information for %u",
4088 err, group);
4089 continue;
4090 }
4091
4092 bitmap_bh = ext4_read_block_bitmap(sb, group);
4093 if (IS_ERR(bitmap_bh)) {
4094 err = PTR_ERR(bitmap_bh);
4095 ext4_error(sb, "Error %d reading block bitmap for %u",
4096 err, group);
4097 ext4_mb_unload_buddy(&e4b);
4098 continue;
4099 }
4100
4101 ext4_lock_group(sb, group);
4102 list_del(&pa->pa_group_list);
4103 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4104 ext4_unlock_group(sb, group);
4105
4106 ext4_mb_unload_buddy(&e4b);
4107 put_bh(bitmap_bh);
4108
4109 list_del(&pa->u.pa_tmp_list);
4110 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4111 }
4112 }
4113
4114 #ifdef CONFIG_EXT4_DEBUG
4115 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4116 {
4117 struct super_block *sb = ac->ac_sb;
4118 ext4_group_t ngroups, i;
4119
4120 if (!ext4_mballoc_debug ||
4121 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
4122 return;
4123
4124 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
4125 " Allocation context details:");
4126 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
4127 ac->ac_status, ac->ac_flags);
4128 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
4129 "goal %lu/%lu/%lu@%lu, "
4130 "best %lu/%lu/%lu@%lu cr %d",
4131 (unsigned long)ac->ac_o_ex.fe_group,
4132 (unsigned long)ac->ac_o_ex.fe_start,
4133 (unsigned long)ac->ac_o_ex.fe_len,
4134 (unsigned long)ac->ac_o_ex.fe_logical,
4135 (unsigned long)ac->ac_g_ex.fe_group,
4136 (unsigned long)ac->ac_g_ex.fe_start,
4137 (unsigned long)ac->ac_g_ex.fe_len,
4138 (unsigned long)ac->ac_g_ex.fe_logical,
4139 (unsigned long)ac->ac_b_ex.fe_group,
4140 (unsigned long)ac->ac_b_ex.fe_start,
4141 (unsigned long)ac->ac_b_ex.fe_len,
4142 (unsigned long)ac->ac_b_ex.fe_logical,
4143 (int)ac->ac_criteria);
4144 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4145 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4146 ngroups = ext4_get_groups_count(sb);
4147 for (i = 0; i < ngroups; i++) {
4148 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4149 struct ext4_prealloc_space *pa;
4150 ext4_grpblk_t start;
4151 struct list_head *cur;
4152 ext4_lock_group(sb, i);
4153 list_for_each(cur, &grp->bb_prealloc_list) {
4154 pa = list_entry(cur, struct ext4_prealloc_space,
4155 pa_group_list);
4156 spin_lock(&pa->pa_lock);
4157 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4158 NULL, &start);
4159 spin_unlock(&pa->pa_lock);
4160 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4161 start, pa->pa_len);
4162 }
4163 ext4_unlock_group(sb, i);
4164
4165 if (grp->bb_free == 0)
4166 continue;
4167 printk(KERN_ERR "%u: %d/%d \n",
4168 i, grp->bb_free, grp->bb_fragments);
4169 }
4170 printk(KERN_ERR "\n");
4171 }
4172 #else
4173 static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4174 {
4175 return;
4176 }
4177 #endif
4178
4179
4180
4181
4182
4183
4184
4185
4186 static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4187 {
4188 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4189 int bsbits = ac->ac_sb->s_blocksize_bits;
4190 loff_t size, isize;
4191
4192 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4193 return;
4194
4195 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4196 return;
4197
4198 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4199 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4200 >> bsbits;
4201
4202 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
4203 !inode_is_open_for_write(ac->ac_inode)) {
4204 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4205 return;
4206 }
4207
4208 if (sbi->s_mb_group_prealloc <= 0) {
4209 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4210 return;
4211 }
4212
4213
4214 size = max(size, isize);
4215 if (size > sbi->s_mb_stream_request) {
4216 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4217 return;
4218 }
4219
4220 BUG_ON(ac->ac_lg != NULL);
4221
4222
4223
4224
4225
4226 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
4227
4228
4229 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4230
4231
4232 mutex_lock(&ac->ac_lg->lg_mutex);
4233 }
4234
4235 static noinline_for_stack int
4236 ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4237 struct ext4_allocation_request *ar)
4238 {
4239 struct super_block *sb = ar->inode->i_sb;
4240 struct ext4_sb_info *sbi = EXT4_SB(sb);
4241 struct ext4_super_block *es = sbi->s_es;
4242 ext4_group_t group;
4243 unsigned int len;
4244 ext4_fsblk_t goal;
4245 ext4_grpblk_t block;
4246
4247
4248 len = ar->len;
4249
4250
4251 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4252 len = EXT4_CLUSTERS_PER_GROUP(sb);
4253
4254
4255 goal = ar->goal;
4256 if (goal < le32_to_cpu(es->s_first_data_block) ||
4257 goal >= ext4_blocks_count(es))
4258 goal = le32_to_cpu(es->s_first_data_block);
4259 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4260
4261
4262 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4263 ac->ac_status = AC_STATUS_CONTINUE;
4264 ac->ac_sb = sb;
4265 ac->ac_inode = ar->inode;
4266 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4267 ac->ac_o_ex.fe_group = group;
4268 ac->ac_o_ex.fe_start = block;
4269 ac->ac_o_ex.fe_len = len;
4270 ac->ac_g_ex = ac->ac_o_ex;
4271 ac->ac_flags = ar->flags;
4272
4273
4274
4275 ext4_mb_group_or_file(ac);
4276
4277 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4278 "left: %u/%u, right %u/%u to %swritable\n",
4279 (unsigned) ar->len, (unsigned) ar->logical,
4280 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4281 (unsigned) ar->lleft, (unsigned) ar->pleft,
4282 (unsigned) ar->lright, (unsigned) ar->pright,
4283 inode_is_open_for_write(ar->inode) ? "" : "non-");
4284 return 0;
4285
4286 }
4287
4288 static noinline_for_stack void
4289 ext4_mb_discard_lg_preallocations(struct super_block *sb,
4290 struct ext4_locality_group *lg,
4291 int order, int total_entries)
4292 {
4293 ext4_group_t group = 0;
4294 struct ext4_buddy e4b;
4295 struct list_head discard_list;
4296 struct ext4_prealloc_space *pa, *tmp;
4297
4298 mb_debug(1, "discard locality group preallocation\n");
4299
4300 INIT_LIST_HEAD(&discard_list);
4301
4302 spin_lock(&lg->lg_prealloc_lock);
4303 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4304 pa_inode_list) {
4305 spin_lock(&pa->pa_lock);
4306 if (atomic_read(&pa->pa_count)) {
4307
4308
4309
4310
4311
4312 spin_unlock(&pa->pa_lock);
4313 continue;
4314 }
4315 if (pa->pa_deleted) {
4316 spin_unlock(&pa->pa_lock);
4317 continue;
4318 }
4319
4320 BUG_ON(pa->pa_type != MB_GROUP_PA);
4321
4322
4323 pa->pa_deleted = 1;
4324 spin_unlock(&pa->pa_lock);
4325
4326 list_del_rcu(&pa->pa_inode_list);
4327 list_add(&pa->u.pa_tmp_list, &discard_list);
4328
4329 total_entries--;
4330 if (total_entries <= 5) {
4331
4332
4333
4334
4335
4336
4337 break;
4338 }
4339 }
4340 spin_unlock(&lg->lg_prealloc_lock);
4341
4342 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4343 int err;
4344
4345 group = ext4_get_group_number(sb, pa->pa_pstart);
4346 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4347 GFP_NOFS|__GFP_NOFAIL);
4348 if (err) {
4349 ext4_error(sb, "Error %d loading buddy information for %u",
4350 err, group);
4351 continue;
4352 }
4353 ext4_lock_group(sb, group);
4354 list_del(&pa->pa_group_list);
4355 ext4_mb_release_group_pa(&e4b, pa);
4356 ext4_unlock_group(sb, group);
4357
4358 ext4_mb_unload_buddy(&e4b);
4359 list_del(&pa->u.pa_tmp_list);
4360 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4361 }
4362 }
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373 static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4374 {
4375 int order, added = 0, lg_prealloc_count = 1;
4376 struct super_block *sb = ac->ac_sb;
4377 struct ext4_locality_group *lg = ac->ac_lg;
4378 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4379
4380 order = fls(pa->pa_free) - 1;
4381 if (order > PREALLOC_TB_SIZE - 1)
4382
4383 order = PREALLOC_TB_SIZE - 1;
4384
4385 spin_lock(&lg->lg_prealloc_lock);
4386 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4387 pa_inode_list) {
4388 spin_lock(&tmp_pa->pa_lock);
4389 if (tmp_pa->pa_deleted) {
4390 spin_unlock(&tmp_pa->pa_lock);
4391 continue;
4392 }
4393 if (!added && pa->pa_free < tmp_pa->pa_free) {
4394
4395 list_add_tail_rcu(&pa->pa_inode_list,
4396 &tmp_pa->pa_inode_list);
4397 added = 1;
4398
4399
4400
4401
4402 }
4403 spin_unlock(&tmp_pa->pa_lock);
4404 lg_prealloc_count++;
4405 }
4406 if (!added)
4407 list_add_tail_rcu(&pa->pa_inode_list,
4408 &lg->lg_prealloc_list[order]);
4409 spin_unlock(&lg->lg_prealloc_lock);
4410
4411
4412 if (lg_prealloc_count > 8) {
4413 ext4_mb_discard_lg_preallocations(sb, lg,
4414 order, lg_prealloc_count);
4415 return;
4416 }
4417 return ;
4418 }
4419
4420
4421
4422
4423 static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4424 {
4425 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4426 struct ext4_prealloc_space *pa = ac->ac_pa;
4427 if (pa) {
4428 if (pa->pa_type == MB_GROUP_PA) {
4429
4430 spin_lock(&pa->pa_lock);
4431 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4432 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4433 pa->pa_free -= ac->ac_b_ex.fe_len;
4434 pa->pa_len -= ac->ac_b_ex.fe_len;
4435 spin_unlock(&pa->pa_lock);
4436 }
4437 }
4438 if (pa) {
4439
4440
4441
4442
4443
4444
4445 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4446 spin_lock(pa->pa_obj_lock);
4447 list_del_rcu(&pa->pa_inode_list);
4448 spin_unlock(pa->pa_obj_lock);
4449 ext4_mb_add_n_trim(ac);
4450 }
4451 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4452 }
4453 if (ac->ac_bitmap_page)
4454 put_page(ac->ac_bitmap_page);
4455 if (ac->ac_buddy_page)
4456 put_page(ac->ac_buddy_page);
4457 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4458 mutex_unlock(&ac->ac_lg->lg_mutex);
4459 ext4_mb_collect_stats(ac);
4460 return 0;
4461 }
4462
4463 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4464 {
4465 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4466 int ret;
4467 int freed = 0;
4468
4469 trace_ext4_mb_discard_preallocations(sb, needed);
4470 for (i = 0; i < ngroups && needed > 0; i++) {
4471 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4472 freed += ret;
4473 needed -= ret;
4474 }
4475
4476 return freed;
4477 }
4478
4479
4480
4481
4482
4483
4484 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4485 struct ext4_allocation_request *ar, int *errp)
4486 {
4487 int freed;
4488 struct ext4_allocation_context *ac = NULL;
4489 struct ext4_sb_info *sbi;
4490 struct super_block *sb;
4491 ext4_fsblk_t block = 0;
4492 unsigned int inquota = 0;
4493 unsigned int reserv_clstrs = 0;
4494
4495 might_sleep();
4496 sb = ar->inode->i_sb;
4497 sbi = EXT4_SB(sb);
4498
4499 trace_ext4_request_blocks(ar);
4500
4501
4502 if (ext4_is_quota_file(ar->inode))
4503 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4504
4505 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4506
4507
4508
4509
4510 while (ar->len &&
4511 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4512
4513
4514 cond_resched();
4515 ar->len = ar->len >> 1;
4516 }
4517 if (!ar->len) {
4518 *errp = -ENOSPC;
4519 return 0;
4520 }
4521 reserv_clstrs = ar->len;
4522 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4523 dquot_alloc_block_nofail(ar->inode,
4524 EXT4_C2B(sbi, ar->len));
4525 } else {
4526 while (ar->len &&
4527 dquot_alloc_block(ar->inode,
4528 EXT4_C2B(sbi, ar->len))) {
4529
4530 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4531 ar->len--;
4532 }
4533 }
4534 inquota = ar->len;
4535 if (ar->len == 0) {
4536 *errp = -EDQUOT;
4537 goto out;
4538 }
4539 }
4540
4541 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4542 if (!ac) {
4543 ar->len = 0;
4544 *errp = -ENOMEM;
4545 goto out;
4546 }
4547
4548 *errp = ext4_mb_initialize_context(ac, ar);
4549 if (*errp) {
4550 ar->len = 0;
4551 goto out;
4552 }
4553
4554 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4555 if (!ext4_mb_use_preallocated(ac)) {
4556 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4557 ext4_mb_normalize_request(ac, ar);
4558 repeat:
4559
4560 *errp = ext4_mb_regular_allocator(ac);
4561 if (*errp)
4562 goto discard_and_exit;
4563
4564
4565
4566
4567 if (ac->ac_status == AC_STATUS_FOUND &&
4568 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4569 *errp = ext4_mb_new_preallocation(ac);
4570 if (*errp) {
4571 discard_and_exit:
4572 ext4_discard_allocated_blocks(ac);
4573 goto errout;
4574 }
4575 }
4576 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4577 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4578 if (*errp) {
4579 ext4_discard_allocated_blocks(ac);
4580 goto errout;
4581 } else {
4582 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4583 ar->len = ac->ac_b_ex.fe_len;
4584 }
4585 } else {
4586 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4587 if (freed)
4588 goto repeat;
4589 *errp = -ENOSPC;
4590 }
4591
4592 errout:
4593 if (*errp) {
4594 ac->ac_b_ex.fe_len = 0;
4595 ar->len = 0;
4596 ext4_mb_show_ac(ac);
4597 }
4598 ext4_mb_release_context(ac);
4599 out:
4600 if (ac)
4601 kmem_cache_free(ext4_ac_cachep, ac);
4602 if (inquota && ar->len < inquota)
4603 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4604 if (!ar->len) {
4605 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4606
4607 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4608 reserv_clstrs);
4609 }
4610
4611 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4612
4613 return block;
4614 }
4615
4616
4617
4618
4619
4620
4621 static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
4622 struct ext4_free_data *entry,
4623 struct ext4_free_data *new_entry,
4624 struct rb_root *entry_rb_root)
4625 {
4626 if ((entry->efd_tid != new_entry->efd_tid) ||
4627 (entry->efd_group != new_entry->efd_group))
4628 return;
4629 if (entry->efd_start_cluster + entry->efd_count ==
4630 new_entry->efd_start_cluster) {
4631 new_entry->efd_start_cluster = entry->efd_start_cluster;
4632 new_entry->efd_count += entry->efd_count;
4633 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
4634 entry->efd_start_cluster) {
4635 new_entry->efd_count += entry->efd_count;
4636 } else
4637 return;
4638 spin_lock(&sbi->s_md_lock);
4639 list_del(&entry->efd_list);
4640 spin_unlock(&sbi->s_md_lock);
4641 rb_erase(&entry->efd_node, entry_rb_root);
4642 kmem_cache_free(ext4_free_data_cachep, entry);
4643 }
4644
4645 static noinline_for_stack int
4646 ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4647 struct ext4_free_data *new_entry)
4648 {
4649 ext4_group_t group = e4b->bd_group;
4650 ext4_grpblk_t cluster;
4651 ext4_grpblk_t clusters = new_entry->efd_count;
4652 struct ext4_free_data *entry;
4653 struct ext4_group_info *db = e4b->bd_info;
4654 struct super_block *sb = e4b->bd_sb;
4655 struct ext4_sb_info *sbi = EXT4_SB(sb);
4656 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4657 struct rb_node *parent = NULL, *new_node;
4658
4659 BUG_ON(!ext4_handle_valid(handle));
4660 BUG_ON(e4b->bd_bitmap_page == NULL);
4661 BUG_ON(e4b->bd_buddy_page == NULL);
4662
4663 new_node = &new_entry->efd_node;
4664 cluster = new_entry->efd_start_cluster;
4665
4666 if (!*n) {
4667
4668
4669
4670
4671
4672 get_page(e4b->bd_buddy_page);
4673 get_page(e4b->bd_bitmap_page);
4674 }
4675 while (*n) {
4676 parent = *n;
4677 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4678 if (cluster < entry->efd_start_cluster)
4679 n = &(*n)->rb_left;
4680 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4681 n = &(*n)->rb_right;
4682 else {
4683 ext4_grp_locked_error(sb, group, 0,
4684 ext4_group_first_block_no(sb, group) +
4685 EXT4_C2B(sbi, cluster),
4686 "Block already on to-be-freed list");
4687 return 0;
4688 }
4689 }
4690
4691 rb_link_node(new_node, parent, n);
4692 rb_insert_color(new_node, &db->bb_free_root);
4693
4694
4695 node = rb_prev(new_node);
4696 if (node) {
4697 entry = rb_entry(node, struct ext4_free_data, efd_node);
4698 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4699 &(db->bb_free_root));
4700 }
4701
4702 node = rb_next(new_node);
4703 if (node) {
4704 entry = rb_entry(node, struct ext4_free_data, efd_node);
4705 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4706 &(db->bb_free_root));
4707 }
4708
4709 spin_lock(&sbi->s_md_lock);
4710 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
4711 sbi->s_mb_free_pending += clusters;
4712 spin_unlock(&sbi->s_md_lock);
4713 return 0;
4714 }
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725 void ext4_free_blocks(handle_t *handle, struct inode *inode,
4726 struct buffer_head *bh, ext4_fsblk_t block,
4727 unsigned long count, int flags)
4728 {
4729 struct buffer_head *bitmap_bh = NULL;
4730 struct super_block *sb = inode->i_sb;
4731 struct ext4_group_desc *gdp;
4732 unsigned int overflow;
4733 ext4_grpblk_t bit;
4734 struct buffer_head *gd_bh;
4735 ext4_group_t block_group;
4736 struct ext4_sb_info *sbi;
4737 struct ext4_buddy e4b;
4738 unsigned int count_clusters;
4739 int err = 0;
4740 int ret;
4741
4742 might_sleep();
4743 if (bh) {
4744 if (block)
4745 BUG_ON(block != bh->b_blocknr);
4746 else
4747 block = bh->b_blocknr;
4748 }
4749
4750 sbi = EXT4_SB(sb);
4751 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4752 !ext4_data_block_valid(sbi, block, count)) {
4753 ext4_error(sb, "Freeing blocks not in datazone - "
4754 "block = %llu, count = %lu", block, count);
4755 goto error_return;
4756 }
4757
4758 ext4_debug("freeing block %llu\n", block);
4759 trace_ext4_free_blocks(inode, block, count, flags);
4760
4761 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4762 BUG_ON(count > 1);
4763
4764 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4765 inode, bh, block);
4766 }
4767
4768
4769
4770
4771
4772
4773
4774
4775 overflow = EXT4_PBLK_COFF(sbi, block);
4776 if (overflow) {
4777 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4778 overflow = sbi->s_cluster_ratio - overflow;
4779 block += overflow;
4780 if (count > overflow)
4781 count -= overflow;
4782 else
4783 return;
4784 } else {
4785 block -= overflow;
4786 count += overflow;
4787 }
4788 }
4789 overflow = EXT4_LBLK_COFF(sbi, count);
4790 if (overflow) {
4791 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4792 if (count > overflow)
4793 count -= overflow;
4794 else
4795 return;
4796 } else
4797 count += sbi->s_cluster_ratio - overflow;
4798 }
4799
4800 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4801 int i;
4802 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
4803
4804 for (i = 0; i < count; i++) {
4805 cond_resched();
4806 if (is_metadata)
4807 bh = sb_find_get_block(inode->i_sb, block + i);
4808 ext4_forget(handle, is_metadata, inode, bh, block + i);
4809 }
4810 }
4811
4812 do_more:
4813 overflow = 0;
4814 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4815
4816 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4817 ext4_get_group_info(sb, block_group))))
4818 return;
4819
4820
4821
4822
4823
4824 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4825 overflow = EXT4_C2B(sbi, bit) + count -
4826 EXT4_BLOCKS_PER_GROUP(sb);
4827 count -= overflow;
4828 }
4829 count_clusters = EXT4_NUM_B2C(sbi, count);
4830 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4831 if (IS_ERR(bitmap_bh)) {
4832 err = PTR_ERR(bitmap_bh);
4833 bitmap_bh = NULL;
4834 goto error_return;
4835 }
4836 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4837 if (!gdp) {
4838 err = -EIO;
4839 goto error_return;
4840 }
4841
4842 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4843 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4844 in_range(block, ext4_inode_table(sb, gdp),
4845 sbi->s_itb_per_group) ||
4846 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4847 sbi->s_itb_per_group)) {
4848
4849 ext4_error(sb, "Freeing blocks in system zone - "
4850 "Block = %llu, count = %lu", block, count);
4851
4852 goto error_return;
4853 }
4854
4855 BUFFER_TRACE(bitmap_bh, "getting write access");
4856 err = ext4_journal_get_write_access(handle, bitmap_bh);
4857 if (err)
4858 goto error_return;
4859
4860
4861
4862
4863
4864
4865 BUFFER_TRACE(gd_bh, "get_write_access");
4866 err = ext4_journal_get_write_access(handle, gd_bh);
4867 if (err)
4868 goto error_return;
4869 #ifdef AGGRESSIVE_CHECK
4870 {
4871 int i;
4872 for (i = 0; i < count_clusters; i++)
4873 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4874 }
4875 #endif
4876 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4877
4878
4879 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
4880 GFP_NOFS|__GFP_NOFAIL);
4881 if (err)
4882 goto error_return;
4883
4884
4885
4886
4887
4888
4889
4890 if (ext4_handle_valid(handle) &&
4891 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
4892 !ext4_should_writeback_data(inode))) {
4893 struct ext4_free_data *new_entry;
4894
4895
4896
4897
4898 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4899 GFP_NOFS|__GFP_NOFAIL);
4900 new_entry->efd_start_cluster = bit;
4901 new_entry->efd_group = block_group;
4902 new_entry->efd_count = count_clusters;
4903 new_entry->efd_tid = handle->h_transaction->t_tid;
4904
4905 ext4_lock_group(sb, block_group);
4906 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4907 ext4_mb_free_metadata(handle, &e4b, new_entry);
4908 } else {
4909
4910
4911
4912
4913 if (test_opt(sb, DISCARD)) {
4914 err = ext4_issue_discard(sb, block_group, bit, count,
4915 NULL);
4916 if (err && err != -EOPNOTSUPP)
4917 ext4_msg(sb, KERN_WARNING, "discard request in"
4918 " group:%d block:%d count:%lu failed"
4919 " with %d", block_group, bit, count,
4920 err);
4921 } else
4922 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
4923
4924 ext4_lock_group(sb, block_group);
4925 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4926 mb_free_blocks(inode, &e4b, bit, count_clusters);
4927 }
4928
4929 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4930 ext4_free_group_clusters_set(sb, gdp, ret);
4931 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4932 ext4_group_desc_csum_set(sb, block_group, gdp);
4933 ext4_unlock_group(sb, block_group);
4934
4935 if (sbi->s_log_groups_per_flex) {
4936 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4937 atomic64_add(count_clusters,
4938 &sbi_array_rcu_deref(sbi, s_flex_groups,
4939 flex_group)->free_clusters);
4940 }
4941
4942
4943
4944
4945
4946
4947 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
4948 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4949 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4950 percpu_counter_add(&sbi->s_freeclusters_counter,
4951 count_clusters);
4952 }
4953
4954 ext4_mb_unload_buddy(&e4b);
4955
4956
4957 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4958 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4959
4960
4961 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4962 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4963 if (!err)
4964 err = ret;
4965
4966 if (overflow && !err) {
4967 block += count;
4968 count = overflow;
4969 put_bh(bitmap_bh);
4970 goto do_more;
4971 }
4972 error_return:
4973 brelse(bitmap_bh);
4974 ext4_std_error(sb, err);
4975 return;
4976 }
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987 int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4988 ext4_fsblk_t block, unsigned long count)
4989 {
4990 struct buffer_head *bitmap_bh = NULL;
4991 struct buffer_head *gd_bh;
4992 ext4_group_t block_group;
4993 ext4_grpblk_t bit;
4994 unsigned int i;
4995 struct ext4_group_desc *desc;
4996 struct ext4_sb_info *sbi = EXT4_SB(sb);
4997 struct ext4_buddy e4b;
4998 int err = 0, ret, free_clusters_count;
4999 ext4_grpblk_t clusters_freed;
5000 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
5001 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
5002 unsigned long cluster_count = last_cluster - first_cluster + 1;
5003
5004 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
5005
5006 if (count == 0)
5007 return 0;
5008
5009 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
5010
5011
5012
5013
5014 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
5015 ext4_warning(sb, "too many blocks added to group %u",
5016 block_group);
5017 err = -EINVAL;
5018 goto error_return;
5019 }
5020
5021 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5022 if (IS_ERR(bitmap_bh)) {
5023 err = PTR_ERR(bitmap_bh);
5024 bitmap_bh = NULL;
5025 goto error_return;
5026 }
5027
5028 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
5029 if (!desc) {
5030 err = -EIO;
5031 goto error_return;
5032 }
5033
5034 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
5035 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
5036 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
5037 in_range(block + count - 1, ext4_inode_table(sb, desc),
5038 sbi->s_itb_per_group)) {
5039 ext4_error(sb, "Adding blocks in system zones - "
5040 "Block = %llu, count = %lu",
5041 block, count);
5042 err = -EINVAL;
5043 goto error_return;
5044 }
5045
5046 BUFFER_TRACE(bitmap_bh, "getting write access");
5047 err = ext4_journal_get_write_access(handle, bitmap_bh);
5048 if (err)
5049 goto error_return;
5050
5051
5052
5053
5054
5055
5056 BUFFER_TRACE(gd_bh, "get_write_access");
5057 err = ext4_journal_get_write_access(handle, gd_bh);
5058 if (err)
5059 goto error_return;
5060
5061 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
5062 BUFFER_TRACE(bitmap_bh, "clear bit");
5063 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
5064 ext4_error(sb, "bit already cleared for block %llu",
5065 (ext4_fsblk_t)(block + i));
5066 BUFFER_TRACE(bitmap_bh, "bit already cleared");
5067 } else {
5068 clusters_freed++;
5069 }
5070 }
5071
5072 err = ext4_mb_load_buddy(sb, block_group, &e4b);
5073 if (err)
5074 goto error_return;
5075
5076
5077
5078
5079
5080
5081 ext4_lock_group(sb, block_group);
5082 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
5083 mb_free_blocks(NULL, &e4b, bit, cluster_count);
5084 free_clusters_count = clusters_freed +
5085 ext4_free_group_clusters(sb, desc);
5086 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
5087 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
5088 ext4_group_desc_csum_set(sb, block_group, desc);
5089 ext4_unlock_group(sb, block_group);
5090 percpu_counter_add(&sbi->s_freeclusters_counter,
5091 clusters_freed);
5092
5093 if (sbi->s_log_groups_per_flex) {
5094 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5095 atomic64_add(clusters_freed,
5096 &sbi_array_rcu_deref(sbi, s_flex_groups,
5097 flex_group)->free_clusters);
5098 }
5099
5100 ext4_mb_unload_buddy(&e4b);
5101
5102
5103 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
5104 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
5105
5106
5107 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
5108 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
5109 if (!err)
5110 err = ret;
5111
5112 error_return:
5113 brelse(bitmap_bh);
5114 ext4_std_error(sb, err);
5115 return err;
5116 }
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130 static int ext4_trim_extent(struct super_block *sb, int start, int count,
5131 ext4_group_t group, struct ext4_buddy *e4b)
5132 __releases(bitlock)
5133 __acquires(bitlock)
5134 {
5135 struct ext4_free_extent ex;
5136 int ret = 0;
5137
5138 trace_ext4_trim_extent(sb, group, start, count);
5139
5140 assert_spin_locked(ext4_group_lock_ptr(sb, group));
5141
5142 ex.fe_start = start;
5143 ex.fe_group = group;
5144 ex.fe_len = count;
5145
5146
5147
5148
5149
5150 mb_mark_used(e4b, &ex);
5151 ext4_unlock_group(sb, group);
5152 ret = ext4_issue_discard(sb, group, start, count, NULL);
5153 ext4_lock_group(sb, group);
5154 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5155 return ret;
5156 }
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176 static ext4_grpblk_t
5177 ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5178 ext4_grpblk_t start, ext4_grpblk_t max,
5179 ext4_grpblk_t minblocks)
5180 {
5181 void *bitmap;
5182 ext4_grpblk_t next, count = 0, free_count = 0;
5183 struct ext4_buddy e4b;
5184 int ret = 0;
5185
5186 trace_ext4_trim_all_free(sb, group, start, max);
5187
5188 ret = ext4_mb_load_buddy(sb, group, &e4b);
5189 if (ret) {
5190 ext4_warning(sb, "Error %d loading buddy information for %u",
5191 ret, group);
5192 return ret;
5193 }
5194 bitmap = e4b.bd_bitmap;
5195
5196 ext4_lock_group(sb, group);
5197 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5198 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5199 goto out;
5200
5201 start = (e4b.bd_info->bb_first_free > start) ?
5202 e4b.bd_info->bb_first_free : start;
5203
5204 while (start <= max) {
5205 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5206 if (start > max)
5207 break;
5208 next = mb_find_next_bit(bitmap, max + 1, start);
5209
5210 if ((next - start) >= minblocks) {
5211 ret = ext4_trim_extent(sb, start,
5212 next - start, group, &e4b);
5213 if (ret && ret != -EOPNOTSUPP)
5214 break;
5215 ret = 0;
5216 count += next - start;
5217 }
5218 free_count += next - start;
5219 start = next + 1;
5220
5221 if (fatal_signal_pending(current)) {
5222 count = -ERESTARTSYS;
5223 break;
5224 }
5225
5226 if (need_resched()) {
5227 ext4_unlock_group(sb, group);
5228 cond_resched();
5229 ext4_lock_group(sb, group);
5230 }
5231
5232 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5233 break;
5234 }
5235
5236 if (!ret) {
5237 ret = count;
5238 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5239 }
5240 out:
5241 ext4_unlock_group(sb, group);
5242 ext4_mb_unload_buddy(&e4b);
5243
5244 ext4_debug("trimmed %d blocks in the group %d\n",
5245 count, group);
5246
5247 return ret;
5248 }
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5263 {
5264 struct ext4_group_info *grp;
5265 ext4_group_t group, first_group, last_group;
5266 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5267 uint64_t start, end, minlen, trimmed = 0;
5268 ext4_fsblk_t first_data_blk =
5269 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5270 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5271 int ret = 0;
5272
5273 start = range->start >> sb->s_blocksize_bits;
5274 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5275 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5276 range->minlen >> sb->s_blocksize_bits);
5277
5278 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5279 start >= max_blks ||
5280 range->len < sb->s_blocksize)
5281 return -EINVAL;
5282 if (end >= max_blks)
5283 end = max_blks - 1;
5284 if (end <= first_data_blk)
5285 goto out;
5286 if (start < first_data_blk)
5287 start = first_data_blk;
5288
5289
5290 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5291 &first_group, &first_cluster);
5292 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5293 &last_group, &last_cluster);
5294
5295
5296 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5297
5298 for (group = first_group; group <= last_group; group++) {
5299 grp = ext4_get_group_info(sb, group);
5300
5301 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5302 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
5303 if (ret)
5304 break;
5305 }
5306
5307
5308
5309
5310
5311
5312
5313 if (group == last_group)
5314 end = last_cluster;
5315
5316 if (grp->bb_free >= minlen) {
5317 cnt = ext4_trim_all_free(sb, group, first_cluster,
5318 end, minlen);
5319 if (cnt < 0) {
5320 ret = cnt;
5321 break;
5322 }
5323 trimmed += cnt;
5324 }
5325
5326
5327
5328
5329
5330 first_cluster = 0;
5331 }
5332
5333 if (!ret)
5334 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5335
5336 out:
5337 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5338 return ret;
5339 }
5340
5341
5342 int
5343 ext4_mballoc_query_range(
5344 struct super_block *sb,
5345 ext4_group_t group,
5346 ext4_grpblk_t start,
5347 ext4_grpblk_t end,
5348 ext4_mballoc_query_range_fn formatter,
5349 void *priv)
5350 {
5351 void *bitmap;
5352 ext4_grpblk_t next;
5353 struct ext4_buddy e4b;
5354 int error;
5355
5356 error = ext4_mb_load_buddy(sb, group, &e4b);
5357 if (error)
5358 return error;
5359 bitmap = e4b.bd_bitmap;
5360
5361 ext4_lock_group(sb, group);
5362
5363 start = (e4b.bd_info->bb_first_free > start) ?
5364 e4b.bd_info->bb_first_free : start;
5365 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
5366 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5367
5368 while (start <= end) {
5369 start = mb_find_next_zero_bit(bitmap, end + 1, start);
5370 if (start > end)
5371 break;
5372 next = mb_find_next_bit(bitmap, end + 1, start);
5373
5374 ext4_unlock_group(sb, group);
5375 error = formatter(sb, group, start, next - start, priv);
5376 if (error)
5377 goto out_unload;
5378 ext4_lock_group(sb, group);
5379
5380 start = next + 1;
5381 }
5382
5383 ext4_unlock_group(sb, group);
5384 out_unload:
5385 ext4_mb_unload_buddy(&e4b);
5386
5387 return error;
5388 }