This source file includes following definitions.
- sector_to_bucket
- bucket_to_sector
- bucket_remainder
- PTR_CACHE
- PTR_BUCKET_NR
- PTR_BUCKET
- gen_after
- ptr_stale
- ptr_available
- cached_dev_put
- cached_dev_get
- bucket_gc_gen
- wake_up_allocators
- closure_bio_submit
- wait_for_kthread_stop
1
2 #ifndef _BCACHE_H
3 #define _BCACHE_H
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179 #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
180
181 #include <linux/bcache.h>
182 #include <linux/bio.h>
183 #include <linux/kobject.h>
184 #include <linux/list.h>
185 #include <linux/mutex.h>
186 #include <linux/rbtree.h>
187 #include <linux/rwsem.h>
188 #include <linux/refcount.h>
189 #include <linux/types.h>
190 #include <linux/workqueue.h>
191 #include <linux/kthread.h>
192
193 #include "bset.h"
194 #include "util.h"
195 #include "closure.h"
196
197 struct bucket {
198 atomic_t pin;
199 uint16_t prio;
200 uint8_t gen;
201 uint8_t last_gc;
202 uint16_t gc_mark;
203 };
204
205
206
207
208
209
210 BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
211 #define GC_MARK_RECLAIMABLE 1
212 #define GC_MARK_DIRTY 2
213 #define GC_MARK_METADATA 3
214 #define GC_SECTORS_USED_SIZE 13
215 #define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
216 BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
217 BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
218
219 #include "journal.h"
220 #include "stats.h"
221 struct search;
222 struct btree;
223 struct keybuf;
224
225 struct keybuf_key {
226 struct rb_node node;
227 BKEY_PADDED(key);
228 void *private;
229 };
230
231 struct keybuf {
232 struct bkey last_scanned;
233 spinlock_t lock;
234
235
236
237
238
239
240 struct bkey start;
241 struct bkey end;
242
243 struct rb_root keys;
244
245 #define KEYBUF_NR 500
246 DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
247 };
248
249 struct bcache_device {
250 struct closure cl;
251
252 struct kobject kobj;
253
254 struct cache_set *c;
255 unsigned int id;
256 #define BCACHEDEVNAME_SIZE 12
257 char name[BCACHEDEVNAME_SIZE];
258
259 struct gendisk *disk;
260
261 unsigned long flags;
262 #define BCACHE_DEV_CLOSING 0
263 #define BCACHE_DEV_DETACHING 1
264 #define BCACHE_DEV_UNLINK_DONE 2
265 #define BCACHE_DEV_WB_RUNNING 3
266 #define BCACHE_DEV_RATE_DW_RUNNING 4
267 unsigned int nr_stripes;
268 unsigned int stripe_size;
269 atomic_t *stripe_sectors_dirty;
270 unsigned long *full_dirty_stripes;
271
272 struct bio_set bio_split;
273
274 unsigned int data_csum:1;
275
276 int (*cache_miss)(struct btree *b, struct search *s,
277 struct bio *bio, unsigned int sectors);
278 int (*ioctl)(struct bcache_device *d, fmode_t mode,
279 unsigned int cmd, unsigned long arg);
280 };
281
282 struct io {
283
284 struct hlist_node hash;
285 struct list_head lru;
286
287 unsigned long jiffies;
288 unsigned int sequential;
289 sector_t last;
290 };
291
292 enum stop_on_failure {
293 BCH_CACHED_DEV_STOP_AUTO = 0,
294 BCH_CACHED_DEV_STOP_ALWAYS,
295 BCH_CACHED_DEV_STOP_MODE_MAX,
296 };
297
298 struct cached_dev {
299 struct list_head list;
300 struct bcache_device disk;
301 struct block_device *bdev;
302
303 struct cache_sb sb;
304 struct bio sb_bio;
305 struct bio_vec sb_bv[1];
306 struct closure sb_write;
307 struct semaphore sb_write_mutex;
308
309
310 refcount_t count;
311 struct work_struct detach;
312
313
314
315
316
317 atomic_t running;
318
319
320
321
322
323 struct rw_semaphore writeback_lock;
324
325
326
327
328
329
330 atomic_t has_dirty;
331
332 #define BCH_CACHE_READA_ALL 0
333 #define BCH_CACHE_READA_META_ONLY 1
334 unsigned int cache_readahead_policy;
335 struct bch_ratelimit writeback_rate;
336 struct delayed_work writeback_rate_update;
337
338
339 struct semaphore in_flight;
340 struct task_struct *writeback_thread;
341 struct workqueue_struct *writeback_write_wq;
342
343 struct keybuf writeback_keys;
344
345 struct task_struct *status_update_thread;
346
347
348
349
350
351 struct closure_waitlist writeback_ordering_wait;
352 atomic_t writeback_sequence_next;
353
354
355 #define RECENT_IO_BITS 7
356 #define RECENT_IO (1 << RECENT_IO_BITS)
357 struct io io[RECENT_IO];
358 struct hlist_head io_hash[RECENT_IO + 1];
359 struct list_head io_lru;
360 spinlock_t io_lock;
361
362 struct cache_accounting accounting;
363
364
365 unsigned int sequential_cutoff;
366 unsigned int readahead;
367
368 unsigned int io_disable:1;
369 unsigned int verify:1;
370 unsigned int bypass_torture_test:1;
371
372 unsigned int partial_stripes_expensive:1;
373 unsigned int writeback_metadata:1;
374 unsigned int writeback_running:1;
375 unsigned char writeback_percent;
376 unsigned int writeback_delay;
377
378 uint64_t writeback_rate_target;
379 int64_t writeback_rate_proportional;
380 int64_t writeback_rate_integral;
381 int64_t writeback_rate_integral_scaled;
382 int32_t writeback_rate_change;
383
384 unsigned int writeback_rate_update_seconds;
385 unsigned int writeback_rate_i_term_inverse;
386 unsigned int writeback_rate_p_term_inverse;
387 unsigned int writeback_rate_minimum;
388
389 enum stop_on_failure stop_when_cache_set_failed;
390 #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
391 atomic_t io_errors;
392 unsigned int error_limit;
393 unsigned int offline_seconds;
394
395 char backing_dev_name[BDEVNAME_SIZE];
396 };
397
398 enum alloc_reserve {
399 RESERVE_BTREE,
400 RESERVE_PRIO,
401 RESERVE_MOVINGGC,
402 RESERVE_NONE,
403 RESERVE_NR,
404 };
405
406 struct cache {
407 struct cache_set *set;
408 struct cache_sb sb;
409 struct bio sb_bio;
410 struct bio_vec sb_bv[1];
411
412 struct kobject kobj;
413 struct block_device *bdev;
414
415 struct task_struct *alloc_thread;
416
417 struct closure prio;
418 struct prio_set *disk_buckets;
419
420
421
422
423
424
425
426
427 uint64_t *prio_buckets;
428 uint64_t *prio_last_buckets;
429
430
431
432
433
434
435
436
437
438
439 DECLARE_FIFO(long, free)[RESERVE_NR];
440 DECLARE_FIFO(long, free_inc);
441
442 size_t fifo_last_bucket;
443
444
445 struct bucket *buckets;
446
447 DECLARE_HEAP(struct bucket *, heap);
448
449
450
451
452
453
454 unsigned int invalidate_needs_gc;
455
456 bool discard;
457
458 struct journal_device journal;
459
460
461 #define IO_ERROR_SHIFT 20
462 atomic_t io_errors;
463 atomic_t io_count;
464
465 atomic_long_t meta_sectors_written;
466 atomic_long_t btree_sectors_written;
467 atomic_long_t sectors_written;
468
469 char cache_dev_name[BDEVNAME_SIZE];
470 };
471
472 struct gc_stat {
473 size_t nodes;
474 size_t nodes_pre;
475 size_t key_bytes;
476
477 size_t nkeys;
478 uint64_t data;
479 unsigned int in_use;
480 };
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500 #define CACHE_SET_UNREGISTERING 0
501 #define CACHE_SET_STOPPING 1
502 #define CACHE_SET_RUNNING 2
503 #define CACHE_SET_IO_DISABLE 3
504
505 struct cache_set {
506 struct closure cl;
507
508 struct list_head list;
509 struct kobject kobj;
510 struct kobject internal;
511 struct dentry *debug;
512 struct cache_accounting accounting;
513
514 unsigned long flags;
515 atomic_t idle_counter;
516 atomic_t at_max_writeback_rate;
517
518 struct cache_sb sb;
519
520 struct cache *cache[MAX_CACHES_PER_SET];
521 struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
522 int caches_loaded;
523
524 struct bcache_device **devices;
525 unsigned int devices_max_used;
526 atomic_t attached_dev_nr;
527 struct list_head cached_devs;
528 uint64_t cached_dev_sectors;
529 atomic_long_t flash_dev_dirty_sectors;
530 struct closure caching;
531
532 struct closure sb_write;
533 struct semaphore sb_write_mutex;
534
535 mempool_t search;
536 mempool_t bio_meta;
537 struct bio_set bio_split;
538
539
540 struct shrinker shrink;
541
542
543 struct mutex bucket_lock;
544
545
546 unsigned short bucket_bits;
547
548
549 unsigned short block_bits;
550
551
552
553
554
555 unsigned int btree_pages;
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573 struct list_head btree_cache;
574 struct list_head btree_cache_freeable;
575 struct list_head btree_cache_freed;
576
577
578 unsigned int btree_cache_used;
579
580
581
582
583
584
585
586 wait_queue_head_t btree_cache_wait;
587 struct task_struct *btree_cache_alloc_lock;
588
589
590
591
592
593
594
595
596
597
598
599 atomic_t prio_blocked;
600 wait_queue_head_t bucket_wait;
601
602
603
604
605
606 atomic_t rescale;
607
608
609
610 atomic_t search_inflight;
611
612
613
614
615
616
617 uint16_t min_prio;
618
619
620
621
622
623 uint8_t need_gc;
624 struct gc_stat gc_stats;
625 size_t nbuckets;
626 size_t avail_nbuckets;
627
628 struct task_struct *gc_thread;
629
630 struct bkey gc_done;
631
632
633
634
635
636
637
638
639
640
641
642 #define BCH_ENABLE_AUTO_GC 1
643 #define BCH_DO_AUTO_GC 2
644 uint8_t gc_after_writeback;
645
646
647
648
649
650 int gc_mark_valid;
651
652
653 atomic_t sectors_to_gc;
654 wait_queue_head_t gc_wait;
655
656 struct keybuf moving_gc_keys;
657
658 struct semaphore moving_in_flight;
659
660 struct workqueue_struct *moving_gc_wq;
661
662 struct btree *root;
663
664 #ifdef CONFIG_BCACHE_DEBUG
665 struct btree *verify_data;
666 struct bset *verify_ondisk;
667 struct mutex verify_lock;
668 #endif
669
670 unsigned int nr_uuids;
671 struct uuid_entry *uuids;
672 BKEY_PADDED(uuid_bucket);
673 struct closure uuid_write;
674 struct semaphore uuid_write_mutex;
675
676
677
678
679
680
681
682
683
684 mempool_t fill_iter;
685
686 struct bset_sort_state sort;
687
688
689 struct list_head data_buckets;
690 spinlock_t data_bucket_lock;
691
692 struct journal journal;
693
694 #define CONGESTED_MAX 1024
695 unsigned int congested_last_us;
696 atomic_t congested;
697
698
699 unsigned int congested_read_threshold_us;
700 unsigned int congested_write_threshold_us;
701
702 struct time_stats btree_gc_time;
703 struct time_stats btree_split_time;
704 struct time_stats btree_read_time;
705
706 atomic_long_t cache_read_races;
707 atomic_long_t writeback_keys_done;
708 atomic_long_t writeback_keys_failed;
709
710 atomic_long_t reclaim;
711 atomic_long_t reclaimed_journal_buckets;
712 atomic_long_t flush_write;
713
714 enum {
715 ON_ERROR_UNREGISTER,
716 ON_ERROR_PANIC,
717 } on_error;
718 #define DEFAULT_IO_ERROR_LIMIT 8
719 unsigned int error_limit;
720 unsigned int error_decay;
721
722 unsigned short journal_delay_ms;
723 bool expensive_debug_checks;
724 unsigned int verify:1;
725 unsigned int key_merging_disabled:1;
726 unsigned int gc_always_rewrite:1;
727 unsigned int shrinker_disabled:1;
728 unsigned int copy_gc_enabled:1;
729
730 #define BUCKET_HASH_BITS 12
731 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
732 };
733
734 struct bbio {
735 unsigned int submit_time_us;
736 union {
737 struct bkey key;
738 uint64_t _pad[3];
739
740
741
742
743 };
744 struct bio bio;
745 };
746
747 #define BTREE_PRIO USHRT_MAX
748 #define INITIAL_PRIO 32768U
749
750 #define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
751 #define btree_blocks(b) \
752 ((unsigned int) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
753
754 #define btree_default_blocks(c) \
755 ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
756
757 #define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
758 #define bucket_bytes(c) ((c)->sb.bucket_size << 9)
759 #define block_bytes(c) ((c)->sb.block_size << 9)
760
761 #define prios_per_bucket(c) \
762 ((bucket_bytes(c) - sizeof(struct prio_set)) / \
763 sizeof(struct bucket_disk))
764 #define prio_buckets(c) \
765 DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
766
767 static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
768 {
769 return s >> c->bucket_bits;
770 }
771
772 static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
773 {
774 return ((sector_t) b) << c->bucket_bits;
775 }
776
777 static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
778 {
779 return s & (c->sb.bucket_size - 1);
780 }
781
782 static inline struct cache *PTR_CACHE(struct cache_set *c,
783 const struct bkey *k,
784 unsigned int ptr)
785 {
786 return c->cache[PTR_DEV(k, ptr)];
787 }
788
789 static inline size_t PTR_BUCKET_NR(struct cache_set *c,
790 const struct bkey *k,
791 unsigned int ptr)
792 {
793 return sector_to_bucket(c, PTR_OFFSET(k, ptr));
794 }
795
796 static inline struct bucket *PTR_BUCKET(struct cache_set *c,
797 const struct bkey *k,
798 unsigned int ptr)
799 {
800 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
801 }
802
803 static inline uint8_t gen_after(uint8_t a, uint8_t b)
804 {
805 uint8_t r = a - b;
806
807 return r > 128U ? 0 : r;
808 }
809
810 static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
811 unsigned int i)
812 {
813 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
814 }
815
816 static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
817 unsigned int i)
818 {
819 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
820 }
821
822
823
824
825
826
827
828 #define csum_set(i) \
829 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
830 ((void *) bset_bkey_last(i)) - \
831 (((void *) (i)) + sizeof(uint64_t)))
832
833
834
835 #define btree_bug(b, ...) \
836 do { \
837 if (bch_cache_set_error((b)->c, __VA_ARGS__)) \
838 dump_stack(); \
839 } while (0)
840
841 #define cache_bug(c, ...) \
842 do { \
843 if (bch_cache_set_error(c, __VA_ARGS__)) \
844 dump_stack(); \
845 } while (0)
846
847 #define btree_bug_on(cond, b, ...) \
848 do { \
849 if (cond) \
850 btree_bug(b, __VA_ARGS__); \
851 } while (0)
852
853 #define cache_bug_on(cond, c, ...) \
854 do { \
855 if (cond) \
856 cache_bug(c, __VA_ARGS__); \
857 } while (0)
858
859 #define cache_set_err_on(cond, c, ...) \
860 do { \
861 if (cond) \
862 bch_cache_set_error(c, __VA_ARGS__); \
863 } while (0)
864
865
866
867 #define for_each_cache(ca, cs, iter) \
868 for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
869
870 #define for_each_bucket(b, ca) \
871 for (b = (ca)->buckets + (ca)->sb.first_bucket; \
872 b < (ca)->buckets + (ca)->sb.nbuckets; b++)
873
874 static inline void cached_dev_put(struct cached_dev *dc)
875 {
876 if (refcount_dec_and_test(&dc->count))
877 schedule_work(&dc->detach);
878 }
879
880 static inline bool cached_dev_get(struct cached_dev *dc)
881 {
882 if (!refcount_inc_not_zero(&dc->count))
883 return false;
884
885
886 smp_mb__after_atomic();
887 return true;
888 }
889
890
891
892
893
894
895 static inline uint8_t bucket_gc_gen(struct bucket *b)
896 {
897 return b->gen - b->last_gc;
898 }
899
900 #define BUCKET_GC_GEN_MAX 96U
901
902 #define kobj_attribute_write(n, fn) \
903 static struct kobj_attribute ksysfs_##n = __ATTR(n, 0200, NULL, fn)
904
905 #define kobj_attribute_rw(n, show, store) \
906 static struct kobj_attribute ksysfs_##n = \
907 __ATTR(n, 0600, show, store)
908
909 static inline void wake_up_allocators(struct cache_set *c)
910 {
911 struct cache *ca;
912 unsigned int i;
913
914 for_each_cache(ca, c, i)
915 wake_up_process(ca->alloc_thread);
916 }
917
918 static inline void closure_bio_submit(struct cache_set *c,
919 struct bio *bio,
920 struct closure *cl)
921 {
922 closure_get(cl);
923 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
924 bio->bi_status = BLK_STS_IOERR;
925 bio_endio(bio);
926 return;
927 }
928 generic_make_request(bio);
929 }
930
931
932
933
934
935
936
937 static inline void wait_for_kthread_stop(void)
938 {
939 while (!kthread_should_stop()) {
940 set_current_state(TASK_INTERRUPTIBLE);
941 schedule();
942 }
943 }
944
945
946
947 void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
948 void bch_count_io_errors(struct cache *ca, blk_status_t error,
949 int is_read, const char *m);
950 void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
951 blk_status_t error, const char *m);
952 void bch_bbio_endio(struct cache_set *c, struct bio *bio,
953 blk_status_t error, const char *m);
954 void bch_bbio_free(struct bio *bio, struct cache_set *c);
955 struct bio *bch_bbio_alloc(struct cache_set *c);
956
957 void __bch_submit_bbio(struct bio *bio, struct cache_set *c);
958 void bch_submit_bbio(struct bio *bio, struct cache_set *c,
959 struct bkey *k, unsigned int ptr);
960
961 uint8_t bch_inc_gen(struct cache *ca, struct bucket *b);
962 void bch_rescale_priorities(struct cache_set *c, int sectors);
963
964 bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b);
965 void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b);
966
967 void __bch_bucket_free(struct cache *ca, struct bucket *b);
968 void bch_bucket_free(struct cache_set *c, struct bkey *k);
969
970 long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait);
971 int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
972 struct bkey *k, int n, bool wait);
973 int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
974 struct bkey *k, int n, bool wait);
975 bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
976 unsigned int sectors, unsigned int write_point,
977 unsigned int write_prio, bool wait);
978 bool bch_cached_dev_error(struct cached_dev *dc);
979
980 __printf(2, 3)
981 bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
982
983 int bch_prio_write(struct cache *ca, bool wait);
984 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
985
986 extern struct workqueue_struct *bcache_wq;
987 extern struct workqueue_struct *bch_journal_wq;
988 extern struct mutex bch_register_lock;
989 extern struct list_head bch_cache_sets;
990
991 extern struct kobj_type bch_cached_dev_ktype;
992 extern struct kobj_type bch_flash_dev_ktype;
993 extern struct kobj_type bch_cache_set_ktype;
994 extern struct kobj_type bch_cache_set_internal_ktype;
995 extern struct kobj_type bch_cache_ktype;
996
997 void bch_cached_dev_release(struct kobject *kobj);
998 void bch_flash_dev_release(struct kobject *kobj);
999 void bch_cache_set_release(struct kobject *kobj);
1000 void bch_cache_release(struct kobject *kobj);
1001
1002 int bch_uuid_write(struct cache_set *c);
1003 void bcache_write_super(struct cache_set *c);
1004
1005 int bch_flash_dev_create(struct cache_set *c, uint64_t size);
1006
1007 int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
1008 uint8_t *set_uuid);
1009 void bch_cached_dev_detach(struct cached_dev *dc);
1010 int bch_cached_dev_run(struct cached_dev *dc);
1011 void bcache_device_stop(struct bcache_device *d);
1012
1013 void bch_cache_set_unregister(struct cache_set *c);
1014 void bch_cache_set_stop(struct cache_set *c);
1015
1016 struct cache_set *bch_cache_set_alloc(struct cache_sb *sb);
1017 void bch_btree_cache_free(struct cache_set *c);
1018 int bch_btree_cache_alloc(struct cache_set *c);
1019 void bch_moving_init_cache_set(struct cache_set *c);
1020 int bch_open_buckets_alloc(struct cache_set *c);
1021 void bch_open_buckets_free(struct cache_set *c);
1022
1023 int bch_cache_allocator_start(struct cache *ca);
1024
1025 void bch_debug_exit(void);
1026 void bch_debug_init(void);
1027 void bch_request_exit(void);
1028 int bch_request_init(void);
1029
1030 #endif