This source file includes following definitions.
- kmem_cache_debug
- fixup_red_left
- kmem_cache_has_cpu_partial
- sysfs_slab_add
- sysfs_slab_alias
- memcg_propagate_slab_attrs
- sysfs_slab_remove
- freelist_ptr
- freelist_dereference
- get_freepointer
- prefetch_freepointer
- get_freepointer_safe
- set_freepointer
- slab_index
- order_objects
- oo_make
- oo_order
- oo_objects
- slab_lock
- slab_unlock
- __cmpxchg_double_slab
- cmpxchg_double_slab
- get_map
- size_from_object
- restore_red_left
- metadata_access_enable
- metadata_access_disable
- check_valid_pointer
- print_section
- get_track
- set_track
- init_tracking
- print_track
- print_tracking
- print_page_info
- slab_bug
- slab_fix
- print_trailer
- object_err
- __printf
- init_object
- restore_bytes
- check_bytes_and_report
- check_pad_bytes
- slab_pad_check
- check_object
- check_slab
- on_freelist
- trace
- add_full
- remove_full
- slabs_node
- node_nr_slabs
- inc_slabs_node
- dec_slabs_node
- setup_object_debug
- setup_page_debug
- alloc_consistency_checks
- alloc_debug_processing
- free_consistency_checks
- free_debug_processing
- setup_slub_debug
- kmem_cache_flags
- setup_object_debug
- setup_page_debug
- alloc_debug_processing
- free_debug_processing
- slab_pad_check
- check_object
- add_full
- remove_full
- kmem_cache_flags
- slabs_node
- node_nr_slabs
- inc_slabs_node
- dec_slabs_node
- kmalloc_large_node_hook
- kfree_hook
- slab_free_hook
- slab_free_freelist_hook
- setup_object
- alloc_slab_page
- init_cache_random_seq
- init_freelist_randomization
- next_freelist_entry
- shuffle_freelist
- init_cache_random_seq
- init_freelist_randomization
- shuffle_freelist
- allocate_slab
- new_slab
- __free_slab
- rcu_free_slab
- free_slab
- discard_slab
- __add_partial
- add_partial
- remove_partial
- acquire_slab
- get_partial_node
- get_any_partial
- get_partial
- next_tid
- tid_to_cpu
- tid_to_event
- init_tid
- note_cmpxchg_failure
- init_kmem_cache_cpus
- deactivate_slab
- unfreeze_partials
- put_cpu_partial
- flush_slab
- __flush_cpu_slab
- flush_cpu_slab
- has_cpu_slab
- flush_all
- slub_cpu_dead
- node_match
- count_free
- node_nr_objs
- count_partial
- slab_out_of_memory
- new_slab_objects
- pfmemalloc_match
- get_freelist
- ___slab_alloc
- __slab_alloc
- maybe_wipe_obj_freeptr
- slab_alloc_node
- slab_alloc
- kmem_cache_alloc
- kmem_cache_alloc_trace
- kmem_cache_alloc_node
- kmem_cache_alloc_node_trace
- __slab_free
- do_slab_free
- slab_free
- ___cache_free
- kmem_cache_free
- build_detached_freelist
- kmem_cache_free_bulk
- kmem_cache_alloc_bulk
- slab_order
- calculate_order
- init_kmem_cache_node
- alloc_kmem_cache_cpus
- early_kmem_cache_node_alloc
- free_kmem_cache_nodes
- __kmem_cache_release
- init_kmem_cache_nodes
- set_min_partial
- set_cpu_partial
- calculate_sizes
- kmem_cache_open
- list_slab_objects
- free_partial
- __kmem_cache_empty
- __kmem_cache_shutdown
- setup_slub_min_order
- setup_slub_max_order
- setup_slub_min_objects
- __kmalloc
- kmalloc_large_node
- __kmalloc_node
- __check_heap_object
- __ksize
- kfree
- __kmem_cache_shrink
- __kmemcg_cache_deactivate_after_rcu
- __kmemcg_cache_deactivate
- slab_mem_going_offline_callback
- slab_mem_offline_callback
- slab_mem_going_online_callback
- slab_memory_callback
- bootstrap
- kmem_cache_init
- kmem_cache_init_late
- __kmem_cache_alias
- __kmem_cache_create
- __kmalloc_track_caller
- __kmalloc_node_track_caller
- count_inuse
- count_total
- validate_slab
- validate_slab_slab
- validate_slab_node
- validate_slab_cache
- free_loc_track
- alloc_loc_track
- add_location
- process_slab
- list_locations
- resiliency_test
- resiliency_test
- setup_slub_memcg_sysfs
- show_slab_objects
- any_slab_objects
- slab_size_show
- align_show
- object_size_show
- objs_per_slab_show
- order_store
- order_show
- min_partial_show
- min_partial_store
- cpu_partial_show
- cpu_partial_store
- ctor_show
- aliases_show
- partial_show
- cpu_slabs_show
- objects_show
- objects_partial_show
- slabs_cpu_partial_show
- reclaim_account_show
- reclaim_account_store
- hwcache_align_show
- cache_dma_show
- usersize_show
- destroy_by_rcu_show
- slabs_show
- total_objects_show
- sanity_checks_show
- sanity_checks_store
- trace_show
- trace_store
- red_zone_show
- red_zone_store
- poison_show
- poison_store
- store_user_show
- store_user_store
- validate_show
- validate_store
- alloc_calls_show
- free_calls_show
- failslab_show
- failslab_store
- shrink_show
- shrink_store
- remote_node_defrag_ratio_show
- remote_node_defrag_ratio_store
- show_stat
- clear_stat
- slab_attr_show
- slab_attr_store
- memcg_propagate_slab_attrs
- kmem_cache_release
- uevent_filter
- cache_kset
- create_unique_id
- sysfs_slab_remove_workfn
- sysfs_slab_add
- sysfs_slab_remove
- sysfs_slab_unlink
- sysfs_slab_release
- sysfs_slab_alias
- slab_sysfs_init
- get_slabinfo
- slabinfo_show_stats
- slabinfo_write
1
2
3
4
5
6
7
8
9
10
11
12
13 #include <linux/mm.h>
14 #include <linux/swap.h>
15 #include <linux/module.h>
16 #include <linux/bit_spinlock.h>
17 #include <linux/interrupt.h>
18 #include <linux/bitops.h>
19 #include <linux/slab.h>
20 #include "slab.h"
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kasan.h>
24 #include <linux/cpu.h>
25 #include <linux/cpuset.h>
26 #include <linux/mempolicy.h>
27 #include <linux/ctype.h>
28 #include <linux/debugobjects.h>
29 #include <linux/kallsyms.h>
30 #include <linux/memory.h>
31 #include <linux/math64.h>
32 #include <linux/fault-inject.h>
33 #include <linux/stacktrace.h>
34 #include <linux/prefetch.h>
35 #include <linux/memcontrol.h>
36 #include <linux/random.h>
37
38 #include <trace/events/kmem.h>
39
40 #include "internal.h"
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119 static inline int kmem_cache_debug(struct kmem_cache *s)
120 {
121 #ifdef CONFIG_SLUB_DEBUG
122 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
123 #else
124 return 0;
125 #endif
126 }
127
128 void *fixup_red_left(struct kmem_cache *s, void *p)
129 {
130 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
131 p += s->red_left_pad;
132
133 return p;
134 }
135
136 static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
137 {
138 #ifdef CONFIG_SLUB_CPU_PARTIAL
139 return !kmem_cache_debug(s);
140 #else
141 return false;
142 #endif
143 }
144
145
146
147
148
149
150
151
152
153
154 #undef SLUB_RESILIENCY_TEST
155
156
157 #undef SLUB_DEBUG_CMPXCHG
158
159
160
161
162
163 #define MIN_PARTIAL 5
164
165
166
167
168
169
170 #define MAX_PARTIAL 10
171
172 #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
173 SLAB_POISON | SLAB_STORE_USER)
174
175
176
177
178
179 #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
180 SLAB_TRACE)
181
182
183
184
185
186
187
188 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
189
190 #define OO_SHIFT 16
191 #define OO_MASK ((1 << OO_SHIFT) - 1)
192 #define MAX_OBJS_PER_PAGE 32767
193
194
195
196 #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
197
198 #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
199
200
201
202
203 #define TRACK_ADDRS_COUNT 16
204 struct track {
205 unsigned long addr;
206 #ifdef CONFIG_STACKTRACE
207 unsigned long addrs[TRACK_ADDRS_COUNT];
208 #endif
209 int cpu;
210 int pid;
211 unsigned long when;
212 };
213
214 enum track_item { TRACK_ALLOC, TRACK_FREE };
215
216 #ifdef CONFIG_SYSFS
217 static int sysfs_slab_add(struct kmem_cache *);
218 static int sysfs_slab_alias(struct kmem_cache *, const char *);
219 static void memcg_propagate_slab_attrs(struct kmem_cache *s);
220 static void sysfs_slab_remove(struct kmem_cache *s);
221 #else
222 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
223 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
224 { return 0; }
225 static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
226 static inline void sysfs_slab_remove(struct kmem_cache *s) { }
227 #endif
228
229 static inline void stat(const struct kmem_cache *s, enum stat_item si)
230 {
231 #ifdef CONFIG_SLUB_STATS
232
233
234
235
236 raw_cpu_inc(s->cpu_slab->stat[si]);
237 #endif
238 }
239
240
241
242
243
244
245
246
247
248
249 static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
250 unsigned long ptr_addr)
251 {
252 #ifdef CONFIG_SLAB_FREELIST_HARDENED
253
254
255
256
257
258
259
260
261
262
263 return (void *)((unsigned long)ptr ^ s->random ^
264 swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
265 #else
266 return ptr;
267 #endif
268 }
269
270
271 static inline void *freelist_dereference(const struct kmem_cache *s,
272 void *ptr_addr)
273 {
274 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
275 (unsigned long)ptr_addr);
276 }
277
278 static inline void *get_freepointer(struct kmem_cache *s, void *object)
279 {
280 return freelist_dereference(s, object + s->offset);
281 }
282
283 static void prefetch_freepointer(const struct kmem_cache *s, void *object)
284 {
285 prefetch(object + s->offset);
286 }
287
288 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
289 {
290 unsigned long freepointer_addr;
291 void *p;
292
293 if (!debug_pagealloc_enabled_static())
294 return get_freepointer(s, object);
295
296 freepointer_addr = (unsigned long)object + s->offset;
297 probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
298 return freelist_ptr(s, p, freepointer_addr);
299 }
300
301 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
302 {
303 unsigned long freeptr_addr = (unsigned long)object + s->offset;
304
305 #ifdef CONFIG_SLAB_FREELIST_HARDENED
306 BUG_ON(object == fp);
307 #endif
308
309 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
310 }
311
312
313 #define for_each_object(__p, __s, __addr, __objects) \
314 for (__p = fixup_red_left(__s, __addr); \
315 __p < (__addr) + (__objects) * (__s)->size; \
316 __p += (__s)->size)
317
318
319 static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
320 {
321 return (kasan_reset_tag(p) - addr) / s->size;
322 }
323
324 static inline unsigned int order_objects(unsigned int order, unsigned int size)
325 {
326 return ((unsigned int)PAGE_SIZE << order) / size;
327 }
328
329 static inline struct kmem_cache_order_objects oo_make(unsigned int order,
330 unsigned int size)
331 {
332 struct kmem_cache_order_objects x = {
333 (order << OO_SHIFT) + order_objects(order, size)
334 };
335
336 return x;
337 }
338
339 static inline unsigned int oo_order(struct kmem_cache_order_objects x)
340 {
341 return x.x >> OO_SHIFT;
342 }
343
344 static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
345 {
346 return x.x & OO_MASK;
347 }
348
349
350
351
352 static __always_inline void slab_lock(struct page *page)
353 {
354 VM_BUG_ON_PAGE(PageTail(page), page);
355 bit_spin_lock(PG_locked, &page->flags);
356 }
357
358 static __always_inline void slab_unlock(struct page *page)
359 {
360 VM_BUG_ON_PAGE(PageTail(page), page);
361 __bit_spin_unlock(PG_locked, &page->flags);
362 }
363
364
365 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
366 void *freelist_old, unsigned long counters_old,
367 void *freelist_new, unsigned long counters_new,
368 const char *n)
369 {
370 VM_BUG_ON(!irqs_disabled());
371 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
372 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
373 if (s->flags & __CMPXCHG_DOUBLE) {
374 if (cmpxchg_double(&page->freelist, &page->counters,
375 freelist_old, counters_old,
376 freelist_new, counters_new))
377 return true;
378 } else
379 #endif
380 {
381 slab_lock(page);
382 if (page->freelist == freelist_old &&
383 page->counters == counters_old) {
384 page->freelist = freelist_new;
385 page->counters = counters_new;
386 slab_unlock(page);
387 return true;
388 }
389 slab_unlock(page);
390 }
391
392 cpu_relax();
393 stat(s, CMPXCHG_DOUBLE_FAIL);
394
395 #ifdef SLUB_DEBUG_CMPXCHG
396 pr_info("%s %s: cmpxchg double redo ", n, s->name);
397 #endif
398
399 return false;
400 }
401
402 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
403 void *freelist_old, unsigned long counters_old,
404 void *freelist_new, unsigned long counters_new,
405 const char *n)
406 {
407 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
408 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
409 if (s->flags & __CMPXCHG_DOUBLE) {
410 if (cmpxchg_double(&page->freelist, &page->counters,
411 freelist_old, counters_old,
412 freelist_new, counters_new))
413 return true;
414 } else
415 #endif
416 {
417 unsigned long flags;
418
419 local_irq_save(flags);
420 slab_lock(page);
421 if (page->freelist == freelist_old &&
422 page->counters == counters_old) {
423 page->freelist = freelist_new;
424 page->counters = counters_new;
425 slab_unlock(page);
426 local_irq_restore(flags);
427 return true;
428 }
429 slab_unlock(page);
430 local_irq_restore(flags);
431 }
432
433 cpu_relax();
434 stat(s, CMPXCHG_DOUBLE_FAIL);
435
436 #ifdef SLUB_DEBUG_CMPXCHG
437 pr_info("%s %s: cmpxchg double redo ", n, s->name);
438 #endif
439
440 return false;
441 }
442
443 #ifdef CONFIG_SLUB_DEBUG
444
445
446
447
448
449
450 static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
451 {
452 void *p;
453 void *addr = page_address(page);
454
455 for (p = page->freelist; p; p = get_freepointer(s, p))
456 set_bit(slab_index(p, s, addr), map);
457 }
458
459 static inline unsigned int size_from_object(struct kmem_cache *s)
460 {
461 if (s->flags & SLAB_RED_ZONE)
462 return s->size - s->red_left_pad;
463
464 return s->size;
465 }
466
467 static inline void *restore_red_left(struct kmem_cache *s, void *p)
468 {
469 if (s->flags & SLAB_RED_ZONE)
470 p -= s->red_left_pad;
471
472 return p;
473 }
474
475
476
477
478 #if defined(CONFIG_SLUB_DEBUG_ON)
479 static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
480 #else
481 static slab_flags_t slub_debug;
482 #endif
483
484 static char *slub_debug_slabs;
485 static int disable_higher_order_debug;
486
487
488
489
490
491
492
493 static inline void metadata_access_enable(void)
494 {
495 kasan_disable_current();
496 }
497
498 static inline void metadata_access_disable(void)
499 {
500 kasan_enable_current();
501 }
502
503
504
505
506
507
508 static inline int check_valid_pointer(struct kmem_cache *s,
509 struct page *page, void *object)
510 {
511 void *base;
512
513 if (!object)
514 return 1;
515
516 base = page_address(page);
517 object = kasan_reset_tag(object);
518 object = restore_red_left(s, object);
519 if (object < base || object >= base + page->objects * s->size ||
520 (object - base) % s->size) {
521 return 0;
522 }
523
524 return 1;
525 }
526
527 static void print_section(char *level, char *text, u8 *addr,
528 unsigned int length)
529 {
530 metadata_access_enable();
531 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
532 length, 1);
533 metadata_access_disable();
534 }
535
536 static struct track *get_track(struct kmem_cache *s, void *object,
537 enum track_item alloc)
538 {
539 struct track *p;
540
541 if (s->offset)
542 p = object + s->offset + sizeof(void *);
543 else
544 p = object + s->inuse;
545
546 return p + alloc;
547 }
548
549 static void set_track(struct kmem_cache *s, void *object,
550 enum track_item alloc, unsigned long addr)
551 {
552 struct track *p = get_track(s, object, alloc);
553
554 if (addr) {
555 #ifdef CONFIG_STACKTRACE
556 unsigned int nr_entries;
557
558 metadata_access_enable();
559 nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
560 metadata_access_disable();
561
562 if (nr_entries < TRACK_ADDRS_COUNT)
563 p->addrs[nr_entries] = 0;
564 #endif
565 p->addr = addr;
566 p->cpu = smp_processor_id();
567 p->pid = current->pid;
568 p->when = jiffies;
569 } else {
570 memset(p, 0, sizeof(struct track));
571 }
572 }
573
574 static void init_tracking(struct kmem_cache *s, void *object)
575 {
576 if (!(s->flags & SLAB_STORE_USER))
577 return;
578
579 set_track(s, object, TRACK_FREE, 0UL);
580 set_track(s, object, TRACK_ALLOC, 0UL);
581 }
582
583 static void print_track(const char *s, struct track *t, unsigned long pr_time)
584 {
585 if (!t->addr)
586 return;
587
588 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
589 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
590 #ifdef CONFIG_STACKTRACE
591 {
592 int i;
593 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
594 if (t->addrs[i])
595 pr_err("\t%pS\n", (void *)t->addrs[i]);
596 else
597 break;
598 }
599 #endif
600 }
601
602 static void print_tracking(struct kmem_cache *s, void *object)
603 {
604 unsigned long pr_time = jiffies;
605 if (!(s->flags & SLAB_STORE_USER))
606 return;
607
608 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
609 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
610 }
611
612 static void print_page_info(struct page *page)
613 {
614 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
615 page, page->objects, page->inuse, page->freelist, page->flags);
616
617 }
618
619 static void slab_bug(struct kmem_cache *s, char *fmt, ...)
620 {
621 struct va_format vaf;
622 va_list args;
623
624 va_start(args, fmt);
625 vaf.fmt = fmt;
626 vaf.va = &args;
627 pr_err("=============================================================================\n");
628 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
629 pr_err("-----------------------------------------------------------------------------\n\n");
630
631 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
632 va_end(args);
633 }
634
635 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
636 {
637 struct va_format vaf;
638 va_list args;
639
640 va_start(args, fmt);
641 vaf.fmt = fmt;
642 vaf.va = &args;
643 pr_err("FIX %s: %pV\n", s->name, &vaf);
644 va_end(args);
645 }
646
647 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
648 {
649 unsigned int off;
650 u8 *addr = page_address(page);
651
652 print_tracking(s, p);
653
654 print_page_info(page);
655
656 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
657 p, p - addr, get_freepointer(s, p));
658
659 if (s->flags & SLAB_RED_ZONE)
660 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
661 s->red_left_pad);
662 else if (p > addr + 16)
663 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
664
665 print_section(KERN_ERR, "Object ", p,
666 min_t(unsigned int, s->object_size, PAGE_SIZE));
667 if (s->flags & SLAB_RED_ZONE)
668 print_section(KERN_ERR, "Redzone ", p + s->object_size,
669 s->inuse - s->object_size);
670
671 if (s->offset)
672 off = s->offset + sizeof(void *);
673 else
674 off = s->inuse;
675
676 if (s->flags & SLAB_STORE_USER)
677 off += 2 * sizeof(struct track);
678
679 off += kasan_metadata_size(s);
680
681 if (off != size_from_object(s))
682
683 print_section(KERN_ERR, "Padding ", p + off,
684 size_from_object(s) - off);
685
686 dump_stack();
687 }
688
689 void object_err(struct kmem_cache *s, struct page *page,
690 u8 *object, char *reason)
691 {
692 slab_bug(s, "%s", reason);
693 print_trailer(s, page, object);
694 }
695
696 static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
697 const char *fmt, ...)
698 {
699 va_list args;
700 char buf[100];
701
702 va_start(args, fmt);
703 vsnprintf(buf, sizeof(buf), fmt, args);
704 va_end(args);
705 slab_bug(s, "%s", buf);
706 print_page_info(page);
707 dump_stack();
708 }
709
710 static void init_object(struct kmem_cache *s, void *object, u8 val)
711 {
712 u8 *p = object;
713
714 if (s->flags & SLAB_RED_ZONE)
715 memset(p - s->red_left_pad, val, s->red_left_pad);
716
717 if (s->flags & __OBJECT_POISON) {
718 memset(p, POISON_FREE, s->object_size - 1);
719 p[s->object_size - 1] = POISON_END;
720 }
721
722 if (s->flags & SLAB_RED_ZONE)
723 memset(p + s->object_size, val, s->inuse - s->object_size);
724 }
725
726 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
727 void *from, void *to)
728 {
729 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
730 memset(from, data, to - from);
731 }
732
733 static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
734 u8 *object, char *what,
735 u8 *start, unsigned int value, unsigned int bytes)
736 {
737 u8 *fault;
738 u8 *end;
739
740 metadata_access_enable();
741 fault = memchr_inv(start, value, bytes);
742 metadata_access_disable();
743 if (!fault)
744 return 1;
745
746 end = start + bytes;
747 while (end > fault && end[-1] == value)
748 end--;
749
750 slab_bug(s, "%s overwritten", what);
751 pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
752 fault, end - 1, fault[0], value);
753 print_trailer(s, page, object);
754
755 restore_bytes(s, what, value, fault, end);
756 return 0;
757 }
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
798 {
799 unsigned long off = s->inuse;
800
801 if (s->offset)
802
803 off += sizeof(void *);
804
805 if (s->flags & SLAB_STORE_USER)
806
807 off += 2 * sizeof(struct track);
808
809 off += kasan_metadata_size(s);
810
811 if (size_from_object(s) == off)
812 return 1;
813
814 return check_bytes_and_report(s, page, p, "Object padding",
815 p + off, POISON_INUSE, size_from_object(s) - off);
816 }
817
818
819 static int slab_pad_check(struct kmem_cache *s, struct page *page)
820 {
821 u8 *start;
822 u8 *fault;
823 u8 *end;
824 u8 *pad;
825 int length;
826 int remainder;
827
828 if (!(s->flags & SLAB_POISON))
829 return 1;
830
831 start = page_address(page);
832 length = page_size(page);
833 end = start + length;
834 remainder = length % s->size;
835 if (!remainder)
836 return 1;
837
838 pad = end - remainder;
839 metadata_access_enable();
840 fault = memchr_inv(pad, POISON_INUSE, remainder);
841 metadata_access_disable();
842 if (!fault)
843 return 1;
844 while (end > fault && end[-1] == POISON_INUSE)
845 end--;
846
847 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
848 print_section(KERN_ERR, "Padding ", pad, remainder);
849
850 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
851 return 0;
852 }
853
854 static int check_object(struct kmem_cache *s, struct page *page,
855 void *object, u8 val)
856 {
857 u8 *p = object;
858 u8 *endobject = object + s->object_size;
859
860 if (s->flags & SLAB_RED_ZONE) {
861 if (!check_bytes_and_report(s, page, object, "Redzone",
862 object - s->red_left_pad, val, s->red_left_pad))
863 return 0;
864
865 if (!check_bytes_and_report(s, page, object, "Redzone",
866 endobject, val, s->inuse - s->object_size))
867 return 0;
868 } else {
869 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
870 check_bytes_and_report(s, page, p, "Alignment padding",
871 endobject, POISON_INUSE,
872 s->inuse - s->object_size);
873 }
874 }
875
876 if (s->flags & SLAB_POISON) {
877 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
878 (!check_bytes_and_report(s, page, p, "Poison", p,
879 POISON_FREE, s->object_size - 1) ||
880 !check_bytes_and_report(s, page, p, "Poison",
881 p + s->object_size - 1, POISON_END, 1)))
882 return 0;
883
884
885
886 check_pad_bytes(s, page, p);
887 }
888
889 if (!s->offset && val == SLUB_RED_ACTIVE)
890
891
892
893
894 return 1;
895
896
897 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
898 object_err(s, page, p, "Freepointer corrupt");
899
900
901
902
903
904 set_freepointer(s, p, NULL);
905 return 0;
906 }
907 return 1;
908 }
909
910 static int check_slab(struct kmem_cache *s, struct page *page)
911 {
912 int maxobj;
913
914 VM_BUG_ON(!irqs_disabled());
915
916 if (!PageSlab(page)) {
917 slab_err(s, page, "Not a valid slab page");
918 return 0;
919 }
920
921 maxobj = order_objects(compound_order(page), s->size);
922 if (page->objects > maxobj) {
923 slab_err(s, page, "objects %u > max %u",
924 page->objects, maxobj);
925 return 0;
926 }
927 if (page->inuse > page->objects) {
928 slab_err(s, page, "inuse %u > max %u",
929 page->inuse, page->objects);
930 return 0;
931 }
932
933 slab_pad_check(s, page);
934 return 1;
935 }
936
937
938
939
940
941 static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
942 {
943 int nr = 0;
944 void *fp;
945 void *object = NULL;
946 int max_objects;
947
948 fp = page->freelist;
949 while (fp && nr <= page->objects) {
950 if (fp == search)
951 return 1;
952 if (!check_valid_pointer(s, page, fp)) {
953 if (object) {
954 object_err(s, page, object,
955 "Freechain corrupt");
956 set_freepointer(s, object, NULL);
957 } else {
958 slab_err(s, page, "Freepointer corrupt");
959 page->freelist = NULL;
960 page->inuse = page->objects;
961 slab_fix(s, "Freelist cleared");
962 return 0;
963 }
964 break;
965 }
966 object = fp;
967 fp = get_freepointer(s, object);
968 nr++;
969 }
970
971 max_objects = order_objects(compound_order(page), s->size);
972 if (max_objects > MAX_OBJS_PER_PAGE)
973 max_objects = MAX_OBJS_PER_PAGE;
974
975 if (page->objects != max_objects) {
976 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
977 page->objects, max_objects);
978 page->objects = max_objects;
979 slab_fix(s, "Number of objects adjusted.");
980 }
981 if (page->inuse != page->objects - nr) {
982 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
983 page->inuse, page->objects - nr);
984 page->inuse = page->objects - nr;
985 slab_fix(s, "Object count adjusted.");
986 }
987 return search == NULL;
988 }
989
990 static void trace(struct kmem_cache *s, struct page *page, void *object,
991 int alloc)
992 {
993 if (s->flags & SLAB_TRACE) {
994 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
995 s->name,
996 alloc ? "alloc" : "free",
997 object, page->inuse,
998 page->freelist);
999
1000 if (!alloc)
1001 print_section(KERN_INFO, "Object ", (void *)object,
1002 s->object_size);
1003
1004 dump_stack();
1005 }
1006 }
1007
1008
1009
1010
1011 static void add_full(struct kmem_cache *s,
1012 struct kmem_cache_node *n, struct page *page)
1013 {
1014 if (!(s->flags & SLAB_STORE_USER))
1015 return;
1016
1017 lockdep_assert_held(&n->list_lock);
1018 list_add(&page->slab_list, &n->full);
1019 }
1020
1021 static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1022 {
1023 if (!(s->flags & SLAB_STORE_USER))
1024 return;
1025
1026 lockdep_assert_held(&n->list_lock);
1027 list_del(&page->slab_list);
1028 }
1029
1030
1031 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1032 {
1033 struct kmem_cache_node *n = get_node(s, node);
1034
1035 return atomic_long_read(&n->nr_slabs);
1036 }
1037
1038 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1039 {
1040 return atomic_long_read(&n->nr_slabs);
1041 }
1042
1043 static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1044 {
1045 struct kmem_cache_node *n = get_node(s, node);
1046
1047
1048
1049
1050
1051
1052
1053 if (likely(n)) {
1054 atomic_long_inc(&n->nr_slabs);
1055 atomic_long_add(objects, &n->total_objects);
1056 }
1057 }
1058 static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1059 {
1060 struct kmem_cache_node *n = get_node(s, node);
1061
1062 atomic_long_dec(&n->nr_slabs);
1063 atomic_long_sub(objects, &n->total_objects);
1064 }
1065
1066
1067 static void setup_object_debug(struct kmem_cache *s, struct page *page,
1068 void *object)
1069 {
1070 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1071 return;
1072
1073 init_object(s, object, SLUB_RED_INACTIVE);
1074 init_tracking(s, object);
1075 }
1076
1077 static
1078 void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
1079 {
1080 if (!(s->flags & SLAB_POISON))
1081 return;
1082
1083 metadata_access_enable();
1084 memset(addr, POISON_INUSE, page_size(page));
1085 metadata_access_disable();
1086 }
1087
1088 static inline int alloc_consistency_checks(struct kmem_cache *s,
1089 struct page *page, void *object)
1090 {
1091 if (!check_slab(s, page))
1092 return 0;
1093
1094 if (!check_valid_pointer(s, page, object)) {
1095 object_err(s, page, object, "Freelist Pointer check fails");
1096 return 0;
1097 }
1098
1099 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1100 return 0;
1101
1102 return 1;
1103 }
1104
1105 static noinline int alloc_debug_processing(struct kmem_cache *s,
1106 struct page *page,
1107 void *object, unsigned long addr)
1108 {
1109 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1110 if (!alloc_consistency_checks(s, page, object))
1111 goto bad;
1112 }
1113
1114
1115 if (s->flags & SLAB_STORE_USER)
1116 set_track(s, object, TRACK_ALLOC, addr);
1117 trace(s, page, object, 1);
1118 init_object(s, object, SLUB_RED_ACTIVE);
1119 return 1;
1120
1121 bad:
1122 if (PageSlab(page)) {
1123
1124
1125
1126
1127
1128 slab_fix(s, "Marking all objects used");
1129 page->inuse = page->objects;
1130 page->freelist = NULL;
1131 }
1132 return 0;
1133 }
1134
1135 static inline int free_consistency_checks(struct kmem_cache *s,
1136 struct page *page, void *object, unsigned long addr)
1137 {
1138 if (!check_valid_pointer(s, page, object)) {
1139 slab_err(s, page, "Invalid object pointer 0x%p", object);
1140 return 0;
1141 }
1142
1143 if (on_freelist(s, page, object)) {
1144 object_err(s, page, object, "Object already free");
1145 return 0;
1146 }
1147
1148 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1149 return 0;
1150
1151 if (unlikely(s != page->slab_cache)) {
1152 if (!PageSlab(page)) {
1153 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1154 object);
1155 } else if (!page->slab_cache) {
1156 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1157 object);
1158 dump_stack();
1159 } else
1160 object_err(s, page, object,
1161 "page slab pointer corrupt.");
1162 return 0;
1163 }
1164 return 1;
1165 }
1166
1167
1168 static noinline int free_debug_processing(
1169 struct kmem_cache *s, struct page *page,
1170 void *head, void *tail, int bulk_cnt,
1171 unsigned long addr)
1172 {
1173 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1174 void *object = head;
1175 int cnt = 0;
1176 unsigned long uninitialized_var(flags);
1177 int ret = 0;
1178
1179 spin_lock_irqsave(&n->list_lock, flags);
1180 slab_lock(page);
1181
1182 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1183 if (!check_slab(s, page))
1184 goto out;
1185 }
1186
1187 next_object:
1188 cnt++;
1189
1190 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1191 if (!free_consistency_checks(s, page, object, addr))
1192 goto out;
1193 }
1194
1195 if (s->flags & SLAB_STORE_USER)
1196 set_track(s, object, TRACK_FREE, addr);
1197 trace(s, page, object, 0);
1198
1199 init_object(s, object, SLUB_RED_INACTIVE);
1200
1201
1202 if (object != tail) {
1203 object = get_freepointer(s, object);
1204 goto next_object;
1205 }
1206 ret = 1;
1207
1208 out:
1209 if (cnt != bulk_cnt)
1210 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1211 bulk_cnt, cnt);
1212
1213 slab_unlock(page);
1214 spin_unlock_irqrestore(&n->list_lock, flags);
1215 if (!ret)
1216 slab_fix(s, "Object at 0x%p not freed", object);
1217 return ret;
1218 }
1219
1220 static int __init setup_slub_debug(char *str)
1221 {
1222 slub_debug = DEBUG_DEFAULT_FLAGS;
1223 if (*str++ != '=' || !*str)
1224
1225
1226
1227 goto out;
1228
1229 if (*str == ',')
1230
1231
1232
1233
1234 goto check_slabs;
1235
1236 slub_debug = 0;
1237 if (*str == '-')
1238
1239
1240
1241 goto out;
1242
1243
1244
1245
1246 for (; *str && *str != ','; str++) {
1247 switch (tolower(*str)) {
1248 case 'f':
1249 slub_debug |= SLAB_CONSISTENCY_CHECKS;
1250 break;
1251 case 'z':
1252 slub_debug |= SLAB_RED_ZONE;
1253 break;
1254 case 'p':
1255 slub_debug |= SLAB_POISON;
1256 break;
1257 case 'u':
1258 slub_debug |= SLAB_STORE_USER;
1259 break;
1260 case 't':
1261 slub_debug |= SLAB_TRACE;
1262 break;
1263 case 'a':
1264 slub_debug |= SLAB_FAILSLAB;
1265 break;
1266 case 'o':
1267
1268
1269
1270
1271 disable_higher_order_debug = 1;
1272 break;
1273 default:
1274 pr_err("slub_debug option '%c' unknown. skipped\n",
1275 *str);
1276 }
1277 }
1278
1279 check_slabs:
1280 if (*str == ',')
1281 slub_debug_slabs = str + 1;
1282 out:
1283 if ((static_branch_unlikely(&init_on_alloc) ||
1284 static_branch_unlikely(&init_on_free)) &&
1285 (slub_debug & SLAB_POISON))
1286 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1287 return 1;
1288 }
1289
1290 __setup("slub_debug", setup_slub_debug);
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304 slab_flags_t kmem_cache_flags(unsigned int object_size,
1305 slab_flags_t flags, const char *name,
1306 void (*ctor)(void *))
1307 {
1308 char *iter;
1309 size_t len;
1310
1311
1312 if (!slub_debug_slabs)
1313 return flags | slub_debug;
1314
1315 len = strlen(name);
1316 iter = slub_debug_slabs;
1317 while (*iter) {
1318 char *end, *glob;
1319 size_t cmplen;
1320
1321 end = strchrnul(iter, ',');
1322
1323 glob = strnchr(iter, end - iter, '*');
1324 if (glob)
1325 cmplen = glob - iter;
1326 else
1327 cmplen = max_t(size_t, len, (end - iter));
1328
1329 if (!strncmp(name, iter, cmplen)) {
1330 flags |= slub_debug;
1331 break;
1332 }
1333
1334 if (!*end)
1335 break;
1336 iter = end + 1;
1337 }
1338
1339 return flags;
1340 }
1341 #else
1342 static inline void setup_object_debug(struct kmem_cache *s,
1343 struct page *page, void *object) {}
1344 static inline
1345 void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
1346
1347 static inline int alloc_debug_processing(struct kmem_cache *s,
1348 struct page *page, void *object, unsigned long addr) { return 0; }
1349
1350 static inline int free_debug_processing(
1351 struct kmem_cache *s, struct page *page,
1352 void *head, void *tail, int bulk_cnt,
1353 unsigned long addr) { return 0; }
1354
1355 static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1356 { return 1; }
1357 static inline int check_object(struct kmem_cache *s, struct page *page,
1358 void *object, u8 val) { return 1; }
1359 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1360 struct page *page) {}
1361 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1362 struct page *page) {}
1363 slab_flags_t kmem_cache_flags(unsigned int object_size,
1364 slab_flags_t flags, const char *name,
1365 void (*ctor)(void *))
1366 {
1367 return flags;
1368 }
1369 #define slub_debug 0
1370
1371 #define disable_higher_order_debug 0
1372
1373 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1374 { return 0; }
1375 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1376 { return 0; }
1377 static inline void inc_slabs_node(struct kmem_cache *s, int node,
1378 int objects) {}
1379 static inline void dec_slabs_node(struct kmem_cache *s, int node,
1380 int objects) {}
1381
1382 #endif
1383
1384
1385
1386
1387
1388 static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1389 {
1390 ptr = kasan_kmalloc_large(ptr, size, flags);
1391
1392 kmemleak_alloc(ptr, size, 1, flags);
1393 return ptr;
1394 }
1395
1396 static __always_inline void kfree_hook(void *x)
1397 {
1398 kmemleak_free(x);
1399 kasan_kfree_large(x, _RET_IP_);
1400 }
1401
1402 static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
1403 {
1404 kmemleak_free_recursive(x, s->flags);
1405
1406
1407
1408
1409
1410
1411 #ifdef CONFIG_LOCKDEP
1412 {
1413 unsigned long flags;
1414
1415 local_irq_save(flags);
1416 debug_check_no_locks_freed(x, s->object_size);
1417 local_irq_restore(flags);
1418 }
1419 #endif
1420 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1421 debug_check_no_obj_freed(x, s->object_size);
1422
1423
1424 return kasan_slab_free(s, x, _RET_IP_);
1425 }
1426
1427 static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1428 void **head, void **tail)
1429 {
1430
1431 void *object;
1432 void *next = *head;
1433 void *old_tail = *tail ? *tail : *head;
1434 int rsize;
1435
1436
1437 *head = NULL;
1438 *tail = NULL;
1439
1440 do {
1441 object = next;
1442 next = get_freepointer(s, object);
1443
1444 if (slab_want_init_on_free(s)) {
1445
1446
1447
1448
1449 memset(object, 0, s->object_size);
1450 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
1451 : 0;
1452 memset((char *)object + s->inuse, 0,
1453 s->size - s->inuse - rsize);
1454
1455 }
1456
1457 if (!slab_free_hook(s, object)) {
1458
1459 set_freepointer(s, object, *head);
1460 *head = object;
1461 if (!*tail)
1462 *tail = object;
1463 }
1464 } while (object != old_tail);
1465
1466 if (*head == *tail)
1467 *tail = NULL;
1468
1469 return *head != NULL;
1470 }
1471
1472 static void *setup_object(struct kmem_cache *s, struct page *page,
1473 void *object)
1474 {
1475 setup_object_debug(s, page, object);
1476 object = kasan_init_slab_obj(s, object);
1477 if (unlikely(s->ctor)) {
1478 kasan_unpoison_object_data(s, object);
1479 s->ctor(object);
1480 kasan_poison_object_data(s, object);
1481 }
1482 return object;
1483 }
1484
1485
1486
1487
1488 static inline struct page *alloc_slab_page(struct kmem_cache *s,
1489 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1490 {
1491 struct page *page;
1492 unsigned int order = oo_order(oo);
1493
1494 if (node == NUMA_NO_NODE)
1495 page = alloc_pages(flags, order);
1496 else
1497 page = __alloc_pages_node(node, flags, order);
1498
1499 if (page && charge_slab_page(page, flags, order, s)) {
1500 __free_pages(page, order);
1501 page = NULL;
1502 }
1503
1504 return page;
1505 }
1506
1507 #ifdef CONFIG_SLAB_FREELIST_RANDOM
1508
1509 static int init_cache_random_seq(struct kmem_cache *s)
1510 {
1511 unsigned int count = oo_objects(s->oo);
1512 int err;
1513
1514
1515 if (s->random_seq)
1516 return 0;
1517
1518 err = cache_random_seq_create(s, count, GFP_KERNEL);
1519 if (err) {
1520 pr_err("SLUB: Unable to initialize free list for %s\n",
1521 s->name);
1522 return err;
1523 }
1524
1525
1526 if (s->random_seq) {
1527 unsigned int i;
1528
1529 for (i = 0; i < count; i++)
1530 s->random_seq[i] *= s->size;
1531 }
1532 return 0;
1533 }
1534
1535
1536 static void __init init_freelist_randomization(void)
1537 {
1538 struct kmem_cache *s;
1539
1540 mutex_lock(&slab_mutex);
1541
1542 list_for_each_entry(s, &slab_caches, list)
1543 init_cache_random_seq(s);
1544
1545 mutex_unlock(&slab_mutex);
1546 }
1547
1548
1549 static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1550 unsigned long *pos, void *start,
1551 unsigned long page_limit,
1552 unsigned long freelist_count)
1553 {
1554 unsigned int idx;
1555
1556
1557
1558
1559
1560 do {
1561 idx = s->random_seq[*pos];
1562 *pos += 1;
1563 if (*pos >= freelist_count)
1564 *pos = 0;
1565 } while (unlikely(idx >= page_limit));
1566
1567 return (char *)start + idx;
1568 }
1569
1570
1571 static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1572 {
1573 void *start;
1574 void *cur;
1575 void *next;
1576 unsigned long idx, pos, page_limit, freelist_count;
1577
1578 if (page->objects < 2 || !s->random_seq)
1579 return false;
1580
1581 freelist_count = oo_objects(s->oo);
1582 pos = get_random_int() % freelist_count;
1583
1584 page_limit = page->objects * s->size;
1585 start = fixup_red_left(s, page_address(page));
1586
1587
1588 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1589 freelist_count);
1590 cur = setup_object(s, page, cur);
1591 page->freelist = cur;
1592
1593 for (idx = 1; idx < page->objects; idx++) {
1594 next = next_freelist_entry(s, page, &pos, start, page_limit,
1595 freelist_count);
1596 next = setup_object(s, page, next);
1597 set_freepointer(s, cur, next);
1598 cur = next;
1599 }
1600 set_freepointer(s, cur, NULL);
1601
1602 return true;
1603 }
1604 #else
1605 static inline int init_cache_random_seq(struct kmem_cache *s)
1606 {
1607 return 0;
1608 }
1609 static inline void init_freelist_randomization(void) { }
1610 static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1611 {
1612 return false;
1613 }
1614 #endif
1615
1616 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1617 {
1618 struct page *page;
1619 struct kmem_cache_order_objects oo = s->oo;
1620 gfp_t alloc_gfp;
1621 void *start, *p, *next;
1622 int idx;
1623 bool shuffle;
1624
1625 flags &= gfp_allowed_mask;
1626
1627 if (gfpflags_allow_blocking(flags))
1628 local_irq_enable();
1629
1630 flags |= s->allocflags;
1631
1632
1633
1634
1635
1636 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1637 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1638 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1639
1640 page = alloc_slab_page(s, alloc_gfp, node, oo);
1641 if (unlikely(!page)) {
1642 oo = s->min;
1643 alloc_gfp = flags;
1644
1645
1646
1647
1648 page = alloc_slab_page(s, alloc_gfp, node, oo);
1649 if (unlikely(!page))
1650 goto out;
1651 stat(s, ORDER_FALLBACK);
1652 }
1653
1654 page->objects = oo_objects(oo);
1655
1656 page->slab_cache = s;
1657 __SetPageSlab(page);
1658 if (page_is_pfmemalloc(page))
1659 SetPageSlabPfmemalloc(page);
1660
1661 kasan_poison_slab(page);
1662
1663 start = page_address(page);
1664
1665 setup_page_debug(s, page, start);
1666
1667 shuffle = shuffle_freelist(s, page);
1668
1669 if (!shuffle) {
1670 start = fixup_red_left(s, start);
1671 start = setup_object(s, page, start);
1672 page->freelist = start;
1673 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1674 next = p + s->size;
1675 next = setup_object(s, page, next);
1676 set_freepointer(s, p, next);
1677 p = next;
1678 }
1679 set_freepointer(s, p, NULL);
1680 }
1681
1682 page->inuse = page->objects;
1683 page->frozen = 1;
1684
1685 out:
1686 if (gfpflags_allow_blocking(flags))
1687 local_irq_disable();
1688 if (!page)
1689 return NULL;
1690
1691 inc_slabs_node(s, page_to_nid(page), page->objects);
1692
1693 return page;
1694 }
1695
1696 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1697 {
1698 if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1699 gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
1700 flags &= ~GFP_SLAB_BUG_MASK;
1701 pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
1702 invalid_mask, &invalid_mask, flags, &flags);
1703 dump_stack();
1704 }
1705
1706 return allocate_slab(s,
1707 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1708 }
1709
1710 static void __free_slab(struct kmem_cache *s, struct page *page)
1711 {
1712 int order = compound_order(page);
1713 int pages = 1 << order;
1714
1715 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1716 void *p;
1717
1718 slab_pad_check(s, page);
1719 for_each_object(p, s, page_address(page),
1720 page->objects)
1721 check_object(s, page, p, SLUB_RED_INACTIVE);
1722 }
1723
1724 __ClearPageSlabPfmemalloc(page);
1725 __ClearPageSlab(page);
1726
1727 page->mapping = NULL;
1728 if (current->reclaim_state)
1729 current->reclaim_state->reclaimed_slab += pages;
1730 uncharge_slab_page(page, order, s);
1731 __free_pages(page, order);
1732 }
1733
1734 static void rcu_free_slab(struct rcu_head *h)
1735 {
1736 struct page *page = container_of(h, struct page, rcu_head);
1737
1738 __free_slab(page->slab_cache, page);
1739 }
1740
1741 static void free_slab(struct kmem_cache *s, struct page *page)
1742 {
1743 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1744 call_rcu(&page->rcu_head, rcu_free_slab);
1745 } else
1746 __free_slab(s, page);
1747 }
1748
1749 static void discard_slab(struct kmem_cache *s, struct page *page)
1750 {
1751 dec_slabs_node(s, page_to_nid(page), page->objects);
1752 free_slab(s, page);
1753 }
1754
1755
1756
1757
1758 static inline void
1759 __add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1760 {
1761 n->nr_partial++;
1762 if (tail == DEACTIVATE_TO_TAIL)
1763 list_add_tail(&page->slab_list, &n->partial);
1764 else
1765 list_add(&page->slab_list, &n->partial);
1766 }
1767
1768 static inline void add_partial(struct kmem_cache_node *n,
1769 struct page *page, int tail)
1770 {
1771 lockdep_assert_held(&n->list_lock);
1772 __add_partial(n, page, tail);
1773 }
1774
1775 static inline void remove_partial(struct kmem_cache_node *n,
1776 struct page *page)
1777 {
1778 lockdep_assert_held(&n->list_lock);
1779 list_del(&page->slab_list);
1780 n->nr_partial--;
1781 }
1782
1783
1784
1785
1786
1787
1788
1789 static inline void *acquire_slab(struct kmem_cache *s,
1790 struct kmem_cache_node *n, struct page *page,
1791 int mode, int *objects)
1792 {
1793 void *freelist;
1794 unsigned long counters;
1795 struct page new;
1796
1797 lockdep_assert_held(&n->list_lock);
1798
1799
1800
1801
1802
1803
1804 freelist = page->freelist;
1805 counters = page->counters;
1806 new.counters = counters;
1807 *objects = new.objects - new.inuse;
1808 if (mode) {
1809 new.inuse = page->objects;
1810 new.freelist = NULL;
1811 } else {
1812 new.freelist = freelist;
1813 }
1814
1815 VM_BUG_ON(new.frozen);
1816 new.frozen = 1;
1817
1818 if (!__cmpxchg_double_slab(s, page,
1819 freelist, counters,
1820 new.freelist, new.counters,
1821 "acquire_slab"))
1822 return NULL;
1823
1824 remove_partial(n, page);
1825 WARN_ON(!freelist);
1826 return freelist;
1827 }
1828
1829 static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1830 static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1831
1832
1833
1834
1835 static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1836 struct kmem_cache_cpu *c, gfp_t flags)
1837 {
1838 struct page *page, *page2;
1839 void *object = NULL;
1840 unsigned int available = 0;
1841 int objects;
1842
1843
1844
1845
1846
1847
1848
1849 if (!n || !n->nr_partial)
1850 return NULL;
1851
1852 spin_lock(&n->list_lock);
1853 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
1854 void *t;
1855
1856 if (!pfmemalloc_match(page, flags))
1857 continue;
1858
1859 t = acquire_slab(s, n, page, object == NULL, &objects);
1860 if (!t)
1861 break;
1862
1863 available += objects;
1864 if (!object) {
1865 c->page = page;
1866 stat(s, ALLOC_FROM_PARTIAL);
1867 object = t;
1868 } else {
1869 put_cpu_partial(s, page, 0);
1870 stat(s, CPU_PARTIAL_NODE);
1871 }
1872 if (!kmem_cache_has_cpu_partial(s)
1873 || available > slub_cpu_partial(s) / 2)
1874 break;
1875
1876 }
1877 spin_unlock(&n->list_lock);
1878 return object;
1879 }
1880
1881
1882
1883
1884 static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1885 struct kmem_cache_cpu *c)
1886 {
1887 #ifdef CONFIG_NUMA
1888 struct zonelist *zonelist;
1889 struct zoneref *z;
1890 struct zone *zone;
1891 enum zone_type high_zoneidx = gfp_zone(flags);
1892 void *object;
1893 unsigned int cpuset_mems_cookie;
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913 if (!s->remote_node_defrag_ratio ||
1914 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1915 return NULL;
1916
1917 do {
1918 cpuset_mems_cookie = read_mems_allowed_begin();
1919 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1920 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1921 struct kmem_cache_node *n;
1922
1923 n = get_node(s, zone_to_nid(zone));
1924
1925 if (n && cpuset_zone_allowed(zone, flags) &&
1926 n->nr_partial > s->min_partial) {
1927 object = get_partial_node(s, n, c, flags);
1928 if (object) {
1929
1930
1931
1932
1933
1934
1935
1936 return object;
1937 }
1938 }
1939 }
1940 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1941 #endif
1942 return NULL;
1943 }
1944
1945
1946
1947
1948 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1949 struct kmem_cache_cpu *c)
1950 {
1951 void *object;
1952 int searchnode = node;
1953
1954 if (node == NUMA_NO_NODE)
1955 searchnode = numa_mem_id();
1956
1957 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1958 if (object || node != NUMA_NO_NODE)
1959 return object;
1960
1961 return get_any_partial(s, flags, c);
1962 }
1963
1964 #ifdef CONFIG_PREEMPT
1965
1966
1967
1968
1969
1970 #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1971 #else
1972
1973
1974
1975
1976 #define TID_STEP 1
1977 #endif
1978
1979 static inline unsigned long next_tid(unsigned long tid)
1980 {
1981 return tid + TID_STEP;
1982 }
1983
1984 #ifdef SLUB_DEBUG_CMPXCHG
1985 static inline unsigned int tid_to_cpu(unsigned long tid)
1986 {
1987 return tid % TID_STEP;
1988 }
1989
1990 static inline unsigned long tid_to_event(unsigned long tid)
1991 {
1992 return tid / TID_STEP;
1993 }
1994 #endif
1995
1996 static inline unsigned int init_tid(int cpu)
1997 {
1998 return cpu;
1999 }
2000
2001 static inline void note_cmpxchg_failure(const char *n,
2002 const struct kmem_cache *s, unsigned long tid)
2003 {
2004 #ifdef SLUB_DEBUG_CMPXCHG
2005 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2006
2007 pr_info("%s %s: cmpxchg redo ", n, s->name);
2008
2009 #ifdef CONFIG_PREEMPT
2010 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2011 pr_warn("due to cpu change %d -> %d\n",
2012 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2013 else
2014 #endif
2015 if (tid_to_event(tid) != tid_to_event(actual_tid))
2016 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2017 tid_to_event(tid), tid_to_event(actual_tid));
2018 else
2019 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2020 actual_tid, tid, next_tid(tid));
2021 #endif
2022 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2023 }
2024
2025 static void init_kmem_cache_cpus(struct kmem_cache *s)
2026 {
2027 int cpu;
2028
2029 for_each_possible_cpu(cpu)
2030 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2031 }
2032
2033
2034
2035
2036 static void deactivate_slab(struct kmem_cache *s, struct page *page,
2037 void *freelist, struct kmem_cache_cpu *c)
2038 {
2039 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2040 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2041 int lock = 0;
2042 enum slab_modes l = M_NONE, m = M_NONE;
2043 void *nextfree;
2044 int tail = DEACTIVATE_TO_HEAD;
2045 struct page new;
2046 struct page old;
2047
2048 if (page->freelist) {
2049 stat(s, DEACTIVATE_REMOTE_FREES);
2050 tail = DEACTIVATE_TO_TAIL;
2051 }
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061 while (freelist && (nextfree = get_freepointer(s, freelist))) {
2062 void *prior;
2063 unsigned long counters;
2064
2065 do {
2066 prior = page->freelist;
2067 counters = page->counters;
2068 set_freepointer(s, freelist, prior);
2069 new.counters = counters;
2070 new.inuse--;
2071 VM_BUG_ON(!new.frozen);
2072
2073 } while (!__cmpxchg_double_slab(s, page,
2074 prior, counters,
2075 freelist, new.counters,
2076 "drain percpu freelist"));
2077
2078 freelist = nextfree;
2079 }
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095 redo:
2096
2097 old.freelist = page->freelist;
2098 old.counters = page->counters;
2099 VM_BUG_ON(!old.frozen);
2100
2101
2102 new.counters = old.counters;
2103 if (freelist) {
2104 new.inuse--;
2105 set_freepointer(s, freelist, old.freelist);
2106 new.freelist = freelist;
2107 } else
2108 new.freelist = old.freelist;
2109
2110 new.frozen = 0;
2111
2112 if (!new.inuse && n->nr_partial >= s->min_partial)
2113 m = M_FREE;
2114 else if (new.freelist) {
2115 m = M_PARTIAL;
2116 if (!lock) {
2117 lock = 1;
2118
2119
2120
2121
2122
2123 spin_lock(&n->list_lock);
2124 }
2125 } else {
2126 m = M_FULL;
2127 if (kmem_cache_debug(s) && !lock) {
2128 lock = 1;
2129
2130
2131
2132
2133
2134 spin_lock(&n->list_lock);
2135 }
2136 }
2137
2138 if (l != m) {
2139 if (l == M_PARTIAL)
2140 remove_partial(n, page);
2141 else if (l == M_FULL)
2142 remove_full(s, n, page);
2143
2144 if (m == M_PARTIAL)
2145 add_partial(n, page, tail);
2146 else if (m == M_FULL)
2147 add_full(s, n, page);
2148 }
2149
2150 l = m;
2151 if (!__cmpxchg_double_slab(s, page,
2152 old.freelist, old.counters,
2153 new.freelist, new.counters,
2154 "unfreezing slab"))
2155 goto redo;
2156
2157 if (lock)
2158 spin_unlock(&n->list_lock);
2159
2160 if (m == M_PARTIAL)
2161 stat(s, tail);
2162 else if (m == M_FULL)
2163 stat(s, DEACTIVATE_FULL);
2164 else if (m == M_FREE) {
2165 stat(s, DEACTIVATE_EMPTY);
2166 discard_slab(s, page);
2167 stat(s, FREE_SLAB);
2168 }
2169
2170 c->page = NULL;
2171 c->freelist = NULL;
2172 }
2173
2174
2175
2176
2177
2178
2179
2180
2181 static void unfreeze_partials(struct kmem_cache *s,
2182 struct kmem_cache_cpu *c)
2183 {
2184 #ifdef CONFIG_SLUB_CPU_PARTIAL
2185 struct kmem_cache_node *n = NULL, *n2 = NULL;
2186 struct page *page, *discard_page = NULL;
2187
2188 while ((page = c->partial)) {
2189 struct page new;
2190 struct page old;
2191
2192 c->partial = page->next;
2193
2194 n2 = get_node(s, page_to_nid(page));
2195 if (n != n2) {
2196 if (n)
2197 spin_unlock(&n->list_lock);
2198
2199 n = n2;
2200 spin_lock(&n->list_lock);
2201 }
2202
2203 do {
2204
2205 old.freelist = page->freelist;
2206 old.counters = page->counters;
2207 VM_BUG_ON(!old.frozen);
2208
2209 new.counters = old.counters;
2210 new.freelist = old.freelist;
2211
2212 new.frozen = 0;
2213
2214 } while (!__cmpxchg_double_slab(s, page,
2215 old.freelist, old.counters,
2216 new.freelist, new.counters,
2217 "unfreezing slab"));
2218
2219 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2220 page->next = discard_page;
2221 discard_page = page;
2222 } else {
2223 add_partial(n, page, DEACTIVATE_TO_TAIL);
2224 stat(s, FREE_ADD_PARTIAL);
2225 }
2226 }
2227
2228 if (n)
2229 spin_unlock(&n->list_lock);
2230
2231 while (discard_page) {
2232 page = discard_page;
2233 discard_page = discard_page->next;
2234
2235 stat(s, DEACTIVATE_EMPTY);
2236 discard_slab(s, page);
2237 stat(s, FREE_SLAB);
2238 }
2239 #endif
2240 }
2241
2242
2243
2244
2245
2246
2247
2248
2249 static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2250 {
2251 #ifdef CONFIG_SLUB_CPU_PARTIAL
2252 struct page *oldpage;
2253 int pages;
2254 int pobjects;
2255
2256 preempt_disable();
2257 do {
2258 pages = 0;
2259 pobjects = 0;
2260 oldpage = this_cpu_read(s->cpu_slab->partial);
2261
2262 if (oldpage) {
2263 pobjects = oldpage->pobjects;
2264 pages = oldpage->pages;
2265 if (drain && pobjects > s->cpu_partial) {
2266 unsigned long flags;
2267
2268
2269
2270
2271 local_irq_save(flags);
2272 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2273 local_irq_restore(flags);
2274 oldpage = NULL;
2275 pobjects = 0;
2276 pages = 0;
2277 stat(s, CPU_PARTIAL_DRAIN);
2278 }
2279 }
2280
2281 pages++;
2282 pobjects += page->objects - page->inuse;
2283
2284 page->pages = pages;
2285 page->pobjects = pobjects;
2286 page->next = oldpage;
2287
2288 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2289 != oldpage);
2290 if (unlikely(!s->cpu_partial)) {
2291 unsigned long flags;
2292
2293 local_irq_save(flags);
2294 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2295 local_irq_restore(flags);
2296 }
2297 preempt_enable();
2298 #endif
2299 }
2300
2301 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2302 {
2303 stat(s, CPUSLAB_FLUSH);
2304 deactivate_slab(s, c->page, c->freelist, c);
2305
2306 c->tid = next_tid(c->tid);
2307 }
2308
2309
2310
2311
2312
2313
2314 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2315 {
2316 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2317
2318 if (c->page)
2319 flush_slab(s, c);
2320
2321 unfreeze_partials(s, c);
2322 }
2323
2324 static void flush_cpu_slab(void *d)
2325 {
2326 struct kmem_cache *s = d;
2327
2328 __flush_cpu_slab(s, smp_processor_id());
2329 }
2330
2331 static bool has_cpu_slab(int cpu, void *info)
2332 {
2333 struct kmem_cache *s = info;
2334 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2335
2336 return c->page || slub_percpu_partial(c);
2337 }
2338
2339 static void flush_all(struct kmem_cache *s)
2340 {
2341 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2342 }
2343
2344
2345
2346
2347
2348 static int slub_cpu_dead(unsigned int cpu)
2349 {
2350 struct kmem_cache *s;
2351 unsigned long flags;
2352
2353 mutex_lock(&slab_mutex);
2354 list_for_each_entry(s, &slab_caches, list) {
2355 local_irq_save(flags);
2356 __flush_cpu_slab(s, cpu);
2357 local_irq_restore(flags);
2358 }
2359 mutex_unlock(&slab_mutex);
2360 return 0;
2361 }
2362
2363
2364
2365
2366
2367 static inline int node_match(struct page *page, int node)
2368 {
2369 #ifdef CONFIG_NUMA
2370 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2371 return 0;
2372 #endif
2373 return 1;
2374 }
2375
2376 #ifdef CONFIG_SLUB_DEBUG
2377 static int count_free(struct page *page)
2378 {
2379 return page->objects - page->inuse;
2380 }
2381
2382 static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2383 {
2384 return atomic_long_read(&n->total_objects);
2385 }
2386 #endif
2387
2388 #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2389 static unsigned long count_partial(struct kmem_cache_node *n,
2390 int (*get_count)(struct page *))
2391 {
2392 unsigned long flags;
2393 unsigned long x = 0;
2394 struct page *page;
2395
2396 spin_lock_irqsave(&n->list_lock, flags);
2397 list_for_each_entry(page, &n->partial, slab_list)
2398 x += get_count(page);
2399 spin_unlock_irqrestore(&n->list_lock, flags);
2400 return x;
2401 }
2402 #endif
2403
2404 static noinline void
2405 slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2406 {
2407 #ifdef CONFIG_SLUB_DEBUG
2408 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2409 DEFAULT_RATELIMIT_BURST);
2410 int node;
2411 struct kmem_cache_node *n;
2412
2413 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2414 return;
2415
2416 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2417 nid, gfpflags, &gfpflags);
2418 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2419 s->name, s->object_size, s->size, oo_order(s->oo),
2420 oo_order(s->min));
2421
2422 if (oo_order(s->min) > get_order(s->object_size))
2423 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2424 s->name);
2425
2426 for_each_kmem_cache_node(s, node, n) {
2427 unsigned long nr_slabs;
2428 unsigned long nr_objs;
2429 unsigned long nr_free;
2430
2431 nr_free = count_partial(n, count_free);
2432 nr_slabs = node_nr_slabs(n);
2433 nr_objs = node_nr_objs(n);
2434
2435 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2436 node, nr_slabs, nr_objs, nr_free);
2437 }
2438 #endif
2439 }
2440
2441 static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2442 int node, struct kmem_cache_cpu **pc)
2443 {
2444 void *freelist;
2445 struct kmem_cache_cpu *c = *pc;
2446 struct page *page;
2447
2448 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2449
2450 freelist = get_partial(s, flags, node, c);
2451
2452 if (freelist)
2453 return freelist;
2454
2455 page = new_slab(s, flags, node);
2456 if (page) {
2457 c = raw_cpu_ptr(s->cpu_slab);
2458 if (c->page)
2459 flush_slab(s, c);
2460
2461
2462
2463
2464
2465 freelist = page->freelist;
2466 page->freelist = NULL;
2467
2468 stat(s, ALLOC_SLAB);
2469 c->page = page;
2470 *pc = c;
2471 }
2472
2473 return freelist;
2474 }
2475
2476 static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2477 {
2478 if (unlikely(PageSlabPfmemalloc(page)))
2479 return gfp_pfmemalloc_allowed(gfpflags);
2480
2481 return true;
2482 }
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494 static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2495 {
2496 struct page new;
2497 unsigned long counters;
2498 void *freelist;
2499
2500 do {
2501 freelist = page->freelist;
2502 counters = page->counters;
2503
2504 new.counters = counters;
2505 VM_BUG_ON(!new.frozen);
2506
2507 new.inuse = page->objects;
2508 new.frozen = freelist != NULL;
2509
2510 } while (!__cmpxchg_double_slab(s, page,
2511 freelist, counters,
2512 NULL, new.counters,
2513 "get_freelist"));
2514
2515 return freelist;
2516 }
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2538 unsigned long addr, struct kmem_cache_cpu *c)
2539 {
2540 void *freelist;
2541 struct page *page;
2542
2543 page = c->page;
2544 if (!page) {
2545
2546
2547
2548
2549 if (unlikely(node != NUMA_NO_NODE &&
2550 !node_state(node, N_NORMAL_MEMORY)))
2551 node = NUMA_NO_NODE;
2552 goto new_slab;
2553 }
2554 redo:
2555
2556 if (unlikely(!node_match(page, node))) {
2557
2558
2559
2560
2561 if (!node_state(node, N_NORMAL_MEMORY)) {
2562 node = NUMA_NO_NODE;
2563 goto redo;
2564 } else {
2565 stat(s, ALLOC_NODE_MISMATCH);
2566 deactivate_slab(s, page, c->freelist, c);
2567 goto new_slab;
2568 }
2569 }
2570
2571
2572
2573
2574
2575
2576 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2577 deactivate_slab(s, page, c->freelist, c);
2578 goto new_slab;
2579 }
2580
2581
2582 freelist = c->freelist;
2583 if (freelist)
2584 goto load_freelist;
2585
2586 freelist = get_freelist(s, page);
2587
2588 if (!freelist) {
2589 c->page = NULL;
2590 stat(s, DEACTIVATE_BYPASS);
2591 goto new_slab;
2592 }
2593
2594 stat(s, ALLOC_REFILL);
2595
2596 load_freelist:
2597
2598
2599
2600
2601
2602 VM_BUG_ON(!c->page->frozen);
2603 c->freelist = get_freepointer(s, freelist);
2604 c->tid = next_tid(c->tid);
2605 return freelist;
2606
2607 new_slab:
2608
2609 if (slub_percpu_partial(c)) {
2610 page = c->page = slub_percpu_partial(c);
2611 slub_set_percpu_partial(c, page);
2612 stat(s, CPU_PARTIAL_ALLOC);
2613 goto redo;
2614 }
2615
2616 freelist = new_slab_objects(s, gfpflags, node, &c);
2617
2618 if (unlikely(!freelist)) {
2619 slab_out_of_memory(s, gfpflags, node);
2620 return NULL;
2621 }
2622
2623 page = c->page;
2624 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2625 goto load_freelist;
2626
2627
2628 if (kmem_cache_debug(s) &&
2629 !alloc_debug_processing(s, page, freelist, addr))
2630 goto new_slab;
2631
2632 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2633 return freelist;
2634 }
2635
2636
2637
2638
2639
2640 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2641 unsigned long addr, struct kmem_cache_cpu *c)
2642 {
2643 void *p;
2644 unsigned long flags;
2645
2646 local_irq_save(flags);
2647 #ifdef CONFIG_PREEMPT
2648
2649
2650
2651
2652
2653 c = this_cpu_ptr(s->cpu_slab);
2654 #endif
2655
2656 p = ___slab_alloc(s, gfpflags, node, addr, c);
2657 local_irq_restore(flags);
2658 return p;
2659 }
2660
2661
2662
2663
2664
2665 static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
2666 void *obj)
2667 {
2668 if (unlikely(slab_want_init_on_free(s)) && obj)
2669 memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
2670 }
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682 static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2683 gfp_t gfpflags, int node, unsigned long addr)
2684 {
2685 void *object;
2686 struct kmem_cache_cpu *c;
2687 struct page *page;
2688 unsigned long tid;
2689
2690 s = slab_pre_alloc_hook(s, gfpflags);
2691 if (!s)
2692 return NULL;
2693 redo:
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704 do {
2705 tid = this_cpu_read(s->cpu_slab->tid);
2706 c = raw_cpu_ptr(s->cpu_slab);
2707 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2708 unlikely(tid != READ_ONCE(c->tid)));
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718 barrier();
2719
2720
2721
2722
2723
2724
2725
2726
2727 object = c->freelist;
2728 page = c->page;
2729 if (unlikely(!object || !node_match(page, node))) {
2730 object = __slab_alloc(s, gfpflags, node, addr, c);
2731 stat(s, ALLOC_SLOWPATH);
2732 } else {
2733 void *next_object = get_freepointer_safe(s, object);
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749 if (unlikely(!this_cpu_cmpxchg_double(
2750 s->cpu_slab->freelist, s->cpu_slab->tid,
2751 object, tid,
2752 next_object, next_tid(tid)))) {
2753
2754 note_cmpxchg_failure("slab_alloc", s, tid);
2755 goto redo;
2756 }
2757 prefetch_freepointer(s, next_object);
2758 stat(s, ALLOC_FASTPATH);
2759 }
2760
2761 maybe_wipe_obj_freeptr(s, object);
2762
2763 if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
2764 memset(object, 0, s->object_size);
2765
2766 slab_post_alloc_hook(s, gfpflags, 1, &object);
2767
2768 return object;
2769 }
2770
2771 static __always_inline void *slab_alloc(struct kmem_cache *s,
2772 gfp_t gfpflags, unsigned long addr)
2773 {
2774 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2775 }
2776
2777 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2778 {
2779 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2780
2781 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2782 s->size, gfpflags);
2783
2784 return ret;
2785 }
2786 EXPORT_SYMBOL(kmem_cache_alloc);
2787
2788 #ifdef CONFIG_TRACING
2789 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2790 {
2791 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2792 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2793 ret = kasan_kmalloc(s, ret, size, gfpflags);
2794 return ret;
2795 }
2796 EXPORT_SYMBOL(kmem_cache_alloc_trace);
2797 #endif
2798
2799 #ifdef CONFIG_NUMA
2800 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2801 {
2802 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2803
2804 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2805 s->object_size, s->size, gfpflags, node);
2806
2807 return ret;
2808 }
2809 EXPORT_SYMBOL(kmem_cache_alloc_node);
2810
2811 #ifdef CONFIG_TRACING
2812 void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2813 gfp_t gfpflags,
2814 int node, size_t size)
2815 {
2816 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2817
2818 trace_kmalloc_node(_RET_IP_, ret,
2819 size, s->size, gfpflags, node);
2820
2821 ret = kasan_kmalloc(s, ret, size, gfpflags);
2822 return ret;
2823 }
2824 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2825 #endif
2826 #endif
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836 static void __slab_free(struct kmem_cache *s, struct page *page,
2837 void *head, void *tail, int cnt,
2838 unsigned long addr)
2839
2840 {
2841 void *prior;
2842 int was_frozen;
2843 struct page new;
2844 unsigned long counters;
2845 struct kmem_cache_node *n = NULL;
2846 unsigned long uninitialized_var(flags);
2847
2848 stat(s, FREE_SLOWPATH);
2849
2850 if (kmem_cache_debug(s) &&
2851 !free_debug_processing(s, page, head, tail, cnt, addr))
2852 return;
2853
2854 do {
2855 if (unlikely(n)) {
2856 spin_unlock_irqrestore(&n->list_lock, flags);
2857 n = NULL;
2858 }
2859 prior = page->freelist;
2860 counters = page->counters;
2861 set_freepointer(s, tail, prior);
2862 new.counters = counters;
2863 was_frozen = new.frozen;
2864 new.inuse -= cnt;
2865 if ((!new.inuse || !prior) && !was_frozen) {
2866
2867 if (kmem_cache_has_cpu_partial(s) && !prior) {
2868
2869
2870
2871
2872
2873
2874
2875 new.frozen = 1;
2876
2877 } else {
2878
2879 n = get_node(s, page_to_nid(page));
2880
2881
2882
2883
2884
2885
2886
2887
2888 spin_lock_irqsave(&n->list_lock, flags);
2889
2890 }
2891 }
2892
2893 } while (!cmpxchg_double_slab(s, page,
2894 prior, counters,
2895 head, new.counters,
2896 "__slab_free"));
2897
2898 if (likely(!n)) {
2899
2900
2901
2902
2903
2904 if (new.frozen && !was_frozen) {
2905 put_cpu_partial(s, page, 1);
2906 stat(s, CPU_PARTIAL_FREE);
2907 }
2908
2909
2910
2911
2912 if (was_frozen)
2913 stat(s, FREE_FROZEN);
2914 return;
2915 }
2916
2917 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
2918 goto slab_empty;
2919
2920
2921
2922
2923
2924 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2925 remove_full(s, n, page);
2926 add_partial(n, page, DEACTIVATE_TO_TAIL);
2927 stat(s, FREE_ADD_PARTIAL);
2928 }
2929 spin_unlock_irqrestore(&n->list_lock, flags);
2930 return;
2931
2932 slab_empty:
2933 if (prior) {
2934
2935
2936
2937 remove_partial(n, page);
2938 stat(s, FREE_REMOVE_PARTIAL);
2939 } else {
2940
2941 remove_full(s, n, page);
2942 }
2943
2944 spin_unlock_irqrestore(&n->list_lock, flags);
2945 stat(s, FREE_SLAB);
2946 discard_slab(s, page);
2947 }
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964 static __always_inline void do_slab_free(struct kmem_cache *s,
2965 struct page *page, void *head, void *tail,
2966 int cnt, unsigned long addr)
2967 {
2968 void *tail_obj = tail ? : head;
2969 struct kmem_cache_cpu *c;
2970 unsigned long tid;
2971 redo:
2972
2973
2974
2975
2976
2977
2978 do {
2979 tid = this_cpu_read(s->cpu_slab->tid);
2980 c = raw_cpu_ptr(s->cpu_slab);
2981 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2982 unlikely(tid != READ_ONCE(c->tid)));
2983
2984
2985 barrier();
2986
2987 if (likely(page == c->page)) {
2988 void **freelist = READ_ONCE(c->freelist);
2989
2990 set_freepointer(s, tail_obj, freelist);
2991
2992 if (unlikely(!this_cpu_cmpxchg_double(
2993 s->cpu_slab->freelist, s->cpu_slab->tid,
2994 freelist, tid,
2995 head, next_tid(tid)))) {
2996
2997 note_cmpxchg_failure("slab_free", s, tid);
2998 goto redo;
2999 }
3000 stat(s, FREE_FASTPATH);
3001 } else
3002 __slab_free(s, page, head, tail_obj, cnt, addr);
3003
3004 }
3005
3006 static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3007 void *head, void *tail, int cnt,
3008 unsigned long addr)
3009 {
3010
3011
3012
3013
3014 if (slab_free_freelist_hook(s, &head, &tail))
3015 do_slab_free(s, page, head, tail, cnt, addr);
3016 }
3017
3018 #ifdef CONFIG_KASAN_GENERIC
3019 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3020 {
3021 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3022 }
3023 #endif
3024
3025 void kmem_cache_free(struct kmem_cache *s, void *x)
3026 {
3027 s = cache_from_obj(s, x);
3028 if (!s)
3029 return;
3030 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3031 trace_kmem_cache_free(_RET_IP_, x);
3032 }
3033 EXPORT_SYMBOL(kmem_cache_free);
3034
3035 struct detached_freelist {
3036 struct page *page;
3037 void *tail;
3038 void *freelist;
3039 int cnt;
3040 struct kmem_cache *s;
3041 };
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055 static inline
3056 int build_detached_freelist(struct kmem_cache *s, size_t size,
3057 void **p, struct detached_freelist *df)
3058 {
3059 size_t first_skipped_index = 0;
3060 int lookahead = 3;
3061 void *object;
3062 struct page *page;
3063
3064
3065 df->page = NULL;
3066
3067 do {
3068 object = p[--size];
3069
3070 } while (!object && size);
3071
3072 if (!object)
3073 return 0;
3074
3075 page = virt_to_head_page(object);
3076 if (!s) {
3077
3078 if (unlikely(!PageSlab(page))) {
3079 BUG_ON(!PageCompound(page));
3080 kfree_hook(object);
3081 __free_pages(page, compound_order(page));
3082 p[size] = NULL;
3083 return size;
3084 }
3085
3086 df->s = page->slab_cache;
3087 } else {
3088 df->s = cache_from_obj(s, object);
3089 }
3090
3091
3092 df->page = page;
3093 set_freepointer(df->s, object, NULL);
3094 df->tail = object;
3095 df->freelist = object;
3096 p[size] = NULL;
3097 df->cnt = 1;
3098
3099 while (size) {
3100 object = p[--size];
3101 if (!object)
3102 continue;
3103
3104
3105 if (df->page == virt_to_head_page(object)) {
3106
3107 set_freepointer(df->s, object, df->freelist);
3108 df->freelist = object;
3109 df->cnt++;
3110 p[size] = NULL;
3111
3112 continue;
3113 }
3114
3115
3116 if (!--lookahead)
3117 break;
3118
3119 if (!first_skipped_index)
3120 first_skipped_index = size + 1;
3121 }
3122
3123 return first_skipped_index;
3124 }
3125
3126
3127 void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3128 {
3129 if (WARN_ON(!size))
3130 return;
3131
3132 do {
3133 struct detached_freelist df;
3134
3135 size = build_detached_freelist(s, size, p, &df);
3136 if (!df.page)
3137 continue;
3138
3139 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
3140 } while (likely(size));
3141 }
3142 EXPORT_SYMBOL(kmem_cache_free_bulk);
3143
3144
3145 int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3146 void **p)
3147 {
3148 struct kmem_cache_cpu *c;
3149 int i;
3150
3151
3152 s = slab_pre_alloc_hook(s, flags);
3153 if (unlikely(!s))
3154 return false;
3155
3156
3157
3158
3159
3160 local_irq_disable();
3161 c = this_cpu_ptr(s->cpu_slab);
3162
3163 for (i = 0; i < size; i++) {
3164 void *object = c->freelist;
3165
3166 if (unlikely(!object)) {
3167
3168
3169
3170
3171
3172
3173
3174 c->tid = next_tid(c->tid);
3175
3176
3177
3178
3179
3180 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3181 _RET_IP_, c);
3182 if (unlikely(!p[i]))
3183 goto error;
3184
3185 c = this_cpu_ptr(s->cpu_slab);
3186 maybe_wipe_obj_freeptr(s, p[i]);
3187
3188 continue;
3189 }
3190 c->freelist = get_freepointer(s, object);
3191 p[i] = object;
3192 maybe_wipe_obj_freeptr(s, p[i]);
3193 }
3194 c->tid = next_tid(c->tid);
3195 local_irq_enable();
3196
3197
3198 if (unlikely(slab_want_init_on_alloc(flags, s))) {
3199 int j;
3200
3201 for (j = 0; j < i; j++)
3202 memset(p[j], 0, s->object_size);
3203 }
3204
3205
3206 slab_post_alloc_hook(s, flags, size, p);
3207 return i;
3208 error:
3209 local_irq_enable();
3210 slab_post_alloc_hook(s, flags, i, p);
3211 __kmem_cache_free_bulk(s, i, p);
3212 return 0;
3213 }
3214 EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236 static unsigned int slub_min_order;
3237 static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3238 static unsigned int slub_min_objects;
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265 static inline unsigned int slab_order(unsigned int size,
3266 unsigned int min_objects, unsigned int max_order,
3267 unsigned int fract_leftover)
3268 {
3269 unsigned int min_order = slub_min_order;
3270 unsigned int order;
3271
3272 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3273 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3274
3275 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3276 order <= max_order; order++) {
3277
3278 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3279 unsigned int rem;
3280
3281 rem = slab_size % size;
3282
3283 if (rem <= slab_size / fract_leftover)
3284 break;
3285 }
3286
3287 return order;
3288 }
3289
3290 static inline int calculate_order(unsigned int size)
3291 {
3292 unsigned int order;
3293 unsigned int min_objects;
3294 unsigned int max_objects;
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304 min_objects = slub_min_objects;
3305 if (!min_objects)
3306 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3307 max_objects = order_objects(slub_max_order, size);
3308 min_objects = min(min_objects, max_objects);
3309
3310 while (min_objects > 1) {
3311 unsigned int fraction;
3312
3313 fraction = 16;
3314 while (fraction >= 4) {
3315 order = slab_order(size, min_objects,
3316 slub_max_order, fraction);
3317 if (order <= slub_max_order)
3318 return order;
3319 fraction /= 2;
3320 }
3321 min_objects--;
3322 }
3323
3324
3325
3326
3327
3328 order = slab_order(size, 1, slub_max_order, 1);
3329 if (order <= slub_max_order)
3330 return order;
3331
3332
3333
3334
3335 order = slab_order(size, 1, MAX_ORDER, 1);
3336 if (order < MAX_ORDER)
3337 return order;
3338 return -ENOSYS;
3339 }
3340
3341 static void
3342 init_kmem_cache_node(struct kmem_cache_node *n)
3343 {
3344 n->nr_partial = 0;
3345 spin_lock_init(&n->list_lock);
3346 INIT_LIST_HEAD(&n->partial);
3347 #ifdef CONFIG_SLUB_DEBUG
3348 atomic_long_set(&n->nr_slabs, 0);
3349 atomic_long_set(&n->total_objects, 0);
3350 INIT_LIST_HEAD(&n->full);
3351 #endif
3352 }
3353
3354 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3355 {
3356 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3357 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3358
3359
3360
3361
3362
3363 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3364 2 * sizeof(void *));
3365
3366 if (!s->cpu_slab)
3367 return 0;
3368
3369 init_kmem_cache_cpus(s);
3370
3371 return 1;
3372 }
3373
3374 static struct kmem_cache *kmem_cache_node;
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385 static void early_kmem_cache_node_alloc(int node)
3386 {
3387 struct page *page;
3388 struct kmem_cache_node *n;
3389
3390 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3391
3392 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3393
3394 BUG_ON(!page);
3395 if (page_to_nid(page) != node) {
3396 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3397 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3398 }
3399
3400 n = page->freelist;
3401 BUG_ON(!n);
3402 #ifdef CONFIG_SLUB_DEBUG
3403 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3404 init_tracking(kmem_cache_node, n);
3405 #endif
3406 n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3407 GFP_KERNEL);
3408 page->freelist = get_freepointer(kmem_cache_node, n);
3409 page->inuse = 1;
3410 page->frozen = 0;
3411 kmem_cache_node->node[node] = n;
3412 init_kmem_cache_node(n);
3413 inc_slabs_node(kmem_cache_node, node, page->objects);
3414
3415
3416
3417
3418
3419 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3420 }
3421
3422 static void free_kmem_cache_nodes(struct kmem_cache *s)
3423 {
3424 int node;
3425 struct kmem_cache_node *n;
3426
3427 for_each_kmem_cache_node(s, node, n) {
3428 s->node[node] = NULL;
3429 kmem_cache_free(kmem_cache_node, n);
3430 }
3431 }
3432
3433 void __kmem_cache_release(struct kmem_cache *s)
3434 {
3435 cache_random_seq_destroy(s);
3436 free_percpu(s->cpu_slab);
3437 free_kmem_cache_nodes(s);
3438 }
3439
3440 static int init_kmem_cache_nodes(struct kmem_cache *s)
3441 {
3442 int node;
3443
3444 for_each_node_state(node, N_NORMAL_MEMORY) {
3445 struct kmem_cache_node *n;
3446
3447 if (slab_state == DOWN) {
3448 early_kmem_cache_node_alloc(node);
3449 continue;
3450 }
3451 n = kmem_cache_alloc_node(kmem_cache_node,
3452 GFP_KERNEL, node);
3453
3454 if (!n) {
3455 free_kmem_cache_nodes(s);
3456 return 0;
3457 }
3458
3459 init_kmem_cache_node(n);
3460 s->node[node] = n;
3461 }
3462 return 1;
3463 }
3464
3465 static void set_min_partial(struct kmem_cache *s, unsigned long min)
3466 {
3467 if (min < MIN_PARTIAL)
3468 min = MIN_PARTIAL;
3469 else if (min > MAX_PARTIAL)
3470 min = MAX_PARTIAL;
3471 s->min_partial = min;
3472 }
3473
3474 static void set_cpu_partial(struct kmem_cache *s)
3475 {
3476 #ifdef CONFIG_SLUB_CPU_PARTIAL
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494 if (!kmem_cache_has_cpu_partial(s))
3495 s->cpu_partial = 0;
3496 else if (s->size >= PAGE_SIZE)
3497 s->cpu_partial = 2;
3498 else if (s->size >= 1024)
3499 s->cpu_partial = 6;
3500 else if (s->size >= 256)
3501 s->cpu_partial = 13;
3502 else
3503 s->cpu_partial = 30;
3504 #endif
3505 }
3506
3507
3508
3509
3510
3511 static int calculate_sizes(struct kmem_cache *s, int forced_order)
3512 {
3513 slab_flags_t flags = s->flags;
3514 unsigned int size = s->object_size;
3515 unsigned int order;
3516
3517
3518
3519
3520
3521
3522 size = ALIGN(size, sizeof(void *));
3523
3524 #ifdef CONFIG_SLUB_DEBUG
3525
3526
3527
3528
3529
3530 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3531 !s->ctor)
3532 s->flags |= __OBJECT_POISON;
3533 else
3534 s->flags &= ~__OBJECT_POISON;
3535
3536
3537
3538
3539
3540
3541
3542 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3543 size += sizeof(void *);
3544 #endif
3545
3546
3547
3548
3549
3550 s->inuse = size;
3551
3552 if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3553 s->ctor)) {
3554
3555
3556
3557
3558
3559
3560
3561
3562 s->offset = size;
3563 size += sizeof(void *);
3564 }
3565
3566 #ifdef CONFIG_SLUB_DEBUG
3567 if (flags & SLAB_STORE_USER)
3568
3569
3570
3571
3572 size += 2 * sizeof(struct track);
3573 #endif
3574
3575 kasan_cache_create(s, &size, &s->flags);
3576 #ifdef CONFIG_SLUB_DEBUG
3577 if (flags & SLAB_RED_ZONE) {
3578
3579
3580
3581
3582
3583
3584
3585 size += sizeof(void *);
3586
3587 s->red_left_pad = sizeof(void *);
3588 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3589 size += s->red_left_pad;
3590 }
3591 #endif
3592
3593
3594
3595
3596
3597
3598 size = ALIGN(size, s->align);
3599 s->size = size;
3600 if (forced_order >= 0)
3601 order = forced_order;
3602 else
3603 order = calculate_order(size);
3604
3605 if ((int)order < 0)
3606 return 0;
3607
3608 s->allocflags = 0;
3609 if (order)
3610 s->allocflags |= __GFP_COMP;
3611
3612 if (s->flags & SLAB_CACHE_DMA)
3613 s->allocflags |= GFP_DMA;
3614
3615 if (s->flags & SLAB_CACHE_DMA32)
3616 s->allocflags |= GFP_DMA32;
3617
3618 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3619 s->allocflags |= __GFP_RECLAIMABLE;
3620
3621
3622
3623
3624 s->oo = oo_make(order, size);
3625 s->min = oo_make(get_order(size), size);
3626 if (oo_objects(s->oo) > oo_objects(s->max))
3627 s->max = s->oo;
3628
3629 return !!oo_objects(s->oo);
3630 }
3631
3632 static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3633 {
3634 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3635 #ifdef CONFIG_SLAB_FREELIST_HARDENED
3636 s->random = get_random_long();
3637 #endif
3638
3639 if (!calculate_sizes(s, -1))
3640 goto error;
3641 if (disable_higher_order_debug) {
3642
3643
3644
3645
3646 if (get_order(s->size) > get_order(s->object_size)) {
3647 s->flags &= ~DEBUG_METADATA_FLAGS;
3648 s->offset = 0;
3649 if (!calculate_sizes(s, -1))
3650 goto error;
3651 }
3652 }
3653
3654 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3655 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3656 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3657
3658 s->flags |= __CMPXCHG_DOUBLE;
3659 #endif
3660
3661
3662
3663
3664
3665 set_min_partial(s, ilog2(s->size) / 2);
3666
3667 set_cpu_partial(s);
3668
3669 #ifdef CONFIG_NUMA
3670 s->remote_node_defrag_ratio = 1000;
3671 #endif
3672
3673
3674 if (slab_state >= UP) {
3675 if (init_cache_random_seq(s))
3676 goto error;
3677 }
3678
3679 if (!init_kmem_cache_nodes(s))
3680 goto error;
3681
3682 if (alloc_kmem_cache_cpus(s))
3683 return 0;
3684
3685 free_kmem_cache_nodes(s);
3686 error:
3687 return -EINVAL;
3688 }
3689
3690 static void list_slab_objects(struct kmem_cache *s, struct page *page,
3691 const char *text)
3692 {
3693 #ifdef CONFIG_SLUB_DEBUG
3694 void *addr = page_address(page);
3695 void *p;
3696 unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC);
3697 if (!map)
3698 return;
3699 slab_err(s, page, text, s->name);
3700 slab_lock(page);
3701
3702 get_map(s, page, map);
3703 for_each_object(p, s, addr, page->objects) {
3704
3705 if (!test_bit(slab_index(p, s, addr), map)) {
3706 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3707 print_tracking(s, p);
3708 }
3709 }
3710 slab_unlock(page);
3711 bitmap_free(map);
3712 #endif
3713 }
3714
3715
3716
3717
3718
3719
3720 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3721 {
3722 LIST_HEAD(discard);
3723 struct page *page, *h;
3724
3725 BUG_ON(irqs_disabled());
3726 spin_lock_irq(&n->list_lock);
3727 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3728 if (!page->inuse) {
3729 remove_partial(n, page);
3730 list_add(&page->slab_list, &discard);
3731 } else {
3732 list_slab_objects(s, page,
3733 "Objects remaining in %s on __kmem_cache_shutdown()");
3734 }
3735 }
3736 spin_unlock_irq(&n->list_lock);
3737
3738 list_for_each_entry_safe(page, h, &discard, slab_list)
3739 discard_slab(s, page);
3740 }
3741
3742 bool __kmem_cache_empty(struct kmem_cache *s)
3743 {
3744 int node;
3745 struct kmem_cache_node *n;
3746
3747 for_each_kmem_cache_node(s, node, n)
3748 if (n->nr_partial || slabs_node(s, node))
3749 return false;
3750 return true;
3751 }
3752
3753
3754
3755
3756 int __kmem_cache_shutdown(struct kmem_cache *s)
3757 {
3758 int node;
3759 struct kmem_cache_node *n;
3760
3761 flush_all(s);
3762
3763 for_each_kmem_cache_node(s, node, n) {
3764 free_partial(s, n);
3765 if (n->nr_partial || slabs_node(s, node))
3766 return 1;
3767 }
3768 sysfs_slab_remove(s);
3769 return 0;
3770 }
3771
3772
3773
3774
3775
3776 static int __init setup_slub_min_order(char *str)
3777 {
3778 get_option(&str, (int *)&slub_min_order);
3779
3780 return 1;
3781 }
3782
3783 __setup("slub_min_order=", setup_slub_min_order);
3784
3785 static int __init setup_slub_max_order(char *str)
3786 {
3787 get_option(&str, (int *)&slub_max_order);
3788 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
3789
3790 return 1;
3791 }
3792
3793 __setup("slub_max_order=", setup_slub_max_order);
3794
3795 static int __init setup_slub_min_objects(char *str)
3796 {
3797 get_option(&str, (int *)&slub_min_objects);
3798
3799 return 1;
3800 }
3801
3802 __setup("slub_min_objects=", setup_slub_min_objects);
3803
3804 void *__kmalloc(size_t size, gfp_t flags)
3805 {
3806 struct kmem_cache *s;
3807 void *ret;
3808
3809 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3810 return kmalloc_large(size, flags);
3811
3812 s = kmalloc_slab(size, flags);
3813
3814 if (unlikely(ZERO_OR_NULL_PTR(s)))
3815 return s;
3816
3817 ret = slab_alloc(s, flags, _RET_IP_);
3818
3819 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3820
3821 ret = kasan_kmalloc(s, ret, size, flags);
3822
3823 return ret;
3824 }
3825 EXPORT_SYMBOL(__kmalloc);
3826
3827 #ifdef CONFIG_NUMA
3828 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3829 {
3830 struct page *page;
3831 void *ptr = NULL;
3832 unsigned int order = get_order(size);
3833
3834 flags |= __GFP_COMP;
3835 page = alloc_pages_node(node, flags, order);
3836 if (page) {
3837 ptr = page_address(page);
3838 mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
3839 1 << order);
3840 }
3841
3842 return kmalloc_large_node_hook(ptr, size, flags);
3843 }
3844
3845 void *__kmalloc_node(size_t size, gfp_t flags, int node)
3846 {
3847 struct kmem_cache *s;
3848 void *ret;
3849
3850 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3851 ret = kmalloc_large_node(size, flags, node);
3852
3853 trace_kmalloc_node(_RET_IP_, ret,
3854 size, PAGE_SIZE << get_order(size),
3855 flags, node);
3856
3857 return ret;
3858 }
3859
3860 s = kmalloc_slab(size, flags);
3861
3862 if (unlikely(ZERO_OR_NULL_PTR(s)))
3863 return s;
3864
3865 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3866
3867 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3868
3869 ret = kasan_kmalloc(s, ret, size, flags);
3870
3871 return ret;
3872 }
3873 EXPORT_SYMBOL(__kmalloc_node);
3874 #endif
3875
3876 #ifdef CONFIG_HARDENED_USERCOPY
3877
3878
3879
3880
3881
3882
3883
3884
3885 void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
3886 bool to_user)
3887 {
3888 struct kmem_cache *s;
3889 unsigned int offset;
3890 size_t object_size;
3891
3892 ptr = kasan_reset_tag(ptr);
3893
3894
3895 s = page->slab_cache;
3896
3897
3898 if (ptr < page_address(page))
3899 usercopy_abort("SLUB object not in SLUB page?!", NULL,
3900 to_user, 0, n);
3901
3902
3903 offset = (ptr - page_address(page)) % s->size;
3904
3905
3906 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
3907 if (offset < s->red_left_pad)
3908 usercopy_abort("SLUB object in left red zone",
3909 s->name, to_user, offset, n);
3910 offset -= s->red_left_pad;
3911 }
3912
3913
3914 if (offset >= s->useroffset &&
3915 offset - s->useroffset <= s->usersize &&
3916 n <= s->useroffset - offset + s->usersize)
3917 return;
3918
3919
3920
3921
3922
3923
3924
3925 object_size = slab_ksize(s);
3926 if (usercopy_fallback &&
3927 offset <= object_size && n <= object_size - offset) {
3928 usercopy_warn("SLUB object", s->name, to_user, offset, n);
3929 return;
3930 }
3931
3932 usercopy_abort("SLUB object", s->name, to_user, offset, n);
3933 }
3934 #endif
3935
3936 size_t __ksize(const void *object)
3937 {
3938 struct page *page;
3939
3940 if (unlikely(object == ZERO_SIZE_PTR))
3941 return 0;
3942
3943 page = virt_to_head_page(object);
3944
3945 if (unlikely(!PageSlab(page))) {
3946 WARN_ON(!PageCompound(page));
3947 return page_size(page);
3948 }
3949
3950 return slab_ksize(page->slab_cache);
3951 }
3952 EXPORT_SYMBOL(__ksize);
3953
3954 void kfree(const void *x)
3955 {
3956 struct page *page;
3957 void *object = (void *)x;
3958
3959 trace_kfree(_RET_IP_, x);
3960
3961 if (unlikely(ZERO_OR_NULL_PTR(x)))
3962 return;
3963
3964 page = virt_to_head_page(x);
3965 if (unlikely(!PageSlab(page))) {
3966 unsigned int order = compound_order(page);
3967
3968 BUG_ON(!PageCompound(page));
3969 kfree_hook(object);
3970 mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
3971 -(1 << order));
3972 __free_pages(page, order);
3973 return;
3974 }
3975 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
3976 }
3977 EXPORT_SYMBOL(kfree);
3978
3979 #define SHRINK_PROMOTE_MAX 32
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990 int __kmem_cache_shrink(struct kmem_cache *s)
3991 {
3992 int node;
3993 int i;
3994 struct kmem_cache_node *n;
3995 struct page *page;
3996 struct page *t;
3997 struct list_head discard;
3998 struct list_head promote[SHRINK_PROMOTE_MAX];
3999 unsigned long flags;
4000 int ret = 0;
4001
4002 flush_all(s);
4003 for_each_kmem_cache_node(s, node, n) {
4004 INIT_LIST_HEAD(&discard);
4005 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4006 INIT_LIST_HEAD(promote + i);
4007
4008 spin_lock_irqsave(&n->list_lock, flags);
4009
4010
4011
4012
4013
4014
4015
4016 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4017 int free = page->objects - page->inuse;
4018
4019
4020 barrier();
4021
4022
4023 BUG_ON(free <= 0);
4024
4025 if (free == page->objects) {
4026 list_move(&page->slab_list, &discard);
4027 n->nr_partial--;
4028 } else if (free <= SHRINK_PROMOTE_MAX)
4029 list_move(&page->slab_list, promote + free - 1);
4030 }
4031
4032
4033
4034
4035
4036 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4037 list_splice(promote + i, &n->partial);
4038
4039 spin_unlock_irqrestore(&n->list_lock, flags);
4040
4041
4042 list_for_each_entry_safe(page, t, &discard, slab_list)
4043 discard_slab(s, page);
4044
4045 if (slabs_node(s, node))
4046 ret = 1;
4047 }
4048
4049 return ret;
4050 }
4051
4052 #ifdef CONFIG_MEMCG
4053 void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
4054 {
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067 if (!__kmem_cache_shrink(s))
4068 sysfs_slab_remove(s);
4069 }
4070
4071 void __kmemcg_cache_deactivate(struct kmem_cache *s)
4072 {
4073
4074
4075
4076
4077 slub_set_cpu_partial(s, 0);
4078 s->min_partial = 0;
4079 }
4080 #endif
4081
4082 static int slab_mem_going_offline_callback(void *arg)
4083 {
4084 struct kmem_cache *s;
4085
4086 mutex_lock(&slab_mutex);
4087 list_for_each_entry(s, &slab_caches, list)
4088 __kmem_cache_shrink(s);
4089 mutex_unlock(&slab_mutex);
4090
4091 return 0;
4092 }
4093
4094 static void slab_mem_offline_callback(void *arg)
4095 {
4096 struct kmem_cache_node *n;
4097 struct kmem_cache *s;
4098 struct memory_notify *marg = arg;
4099 int offline_node;
4100
4101 offline_node = marg->status_change_nid_normal;
4102
4103
4104
4105
4106
4107 if (offline_node < 0)
4108 return;
4109
4110 mutex_lock(&slab_mutex);
4111 list_for_each_entry(s, &slab_caches, list) {
4112 n = get_node(s, offline_node);
4113 if (n) {
4114
4115
4116
4117
4118
4119
4120 BUG_ON(slabs_node(s, offline_node));
4121
4122 s->node[offline_node] = NULL;
4123 kmem_cache_free(kmem_cache_node, n);
4124 }
4125 }
4126 mutex_unlock(&slab_mutex);
4127 }
4128
4129 static int slab_mem_going_online_callback(void *arg)
4130 {
4131 struct kmem_cache_node *n;
4132 struct kmem_cache *s;
4133 struct memory_notify *marg = arg;
4134 int nid = marg->status_change_nid_normal;
4135 int ret = 0;
4136
4137
4138
4139
4140
4141 if (nid < 0)
4142 return 0;
4143
4144
4145
4146
4147
4148
4149 mutex_lock(&slab_mutex);
4150 list_for_each_entry(s, &slab_caches, list) {
4151
4152
4153
4154
4155
4156 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4157 if (!n) {
4158 ret = -ENOMEM;
4159 goto out;
4160 }
4161 init_kmem_cache_node(n);
4162 s->node[nid] = n;
4163 }
4164 out:
4165 mutex_unlock(&slab_mutex);
4166 return ret;
4167 }
4168
4169 static int slab_memory_callback(struct notifier_block *self,
4170 unsigned long action, void *arg)
4171 {
4172 int ret = 0;
4173
4174 switch (action) {
4175 case MEM_GOING_ONLINE:
4176 ret = slab_mem_going_online_callback(arg);
4177 break;
4178 case MEM_GOING_OFFLINE:
4179 ret = slab_mem_going_offline_callback(arg);
4180 break;
4181 case MEM_OFFLINE:
4182 case MEM_CANCEL_ONLINE:
4183 slab_mem_offline_callback(arg);
4184 break;
4185 case MEM_ONLINE:
4186 case MEM_CANCEL_OFFLINE:
4187 break;
4188 }
4189 if (ret)
4190 ret = notifier_from_errno(ret);
4191 else
4192 ret = NOTIFY_OK;
4193 return ret;
4194 }
4195
4196 static struct notifier_block slab_memory_callback_nb = {
4197 .notifier_call = slab_memory_callback,
4198 .priority = SLAB_CALLBACK_PRI,
4199 };
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211 static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4212 {
4213 int node;
4214 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4215 struct kmem_cache_node *n;
4216
4217 memcpy(s, static_cache, kmem_cache->object_size);
4218
4219
4220
4221
4222
4223
4224 __flush_cpu_slab(s, smp_processor_id());
4225 for_each_kmem_cache_node(s, node, n) {
4226 struct page *p;
4227
4228 list_for_each_entry(p, &n->partial, slab_list)
4229 p->slab_cache = s;
4230
4231 #ifdef CONFIG_SLUB_DEBUG
4232 list_for_each_entry(p, &n->full, slab_list)
4233 p->slab_cache = s;
4234 #endif
4235 }
4236 slab_init_memcg_params(s);
4237 list_add(&s->list, &slab_caches);
4238 memcg_link_cache(s, NULL);
4239 return s;
4240 }
4241
4242 void __init kmem_cache_init(void)
4243 {
4244 static __initdata struct kmem_cache boot_kmem_cache,
4245 boot_kmem_cache_node;
4246
4247 if (debug_guardpage_minorder())
4248 slub_max_order = 0;
4249
4250 kmem_cache_node = &boot_kmem_cache_node;
4251 kmem_cache = &boot_kmem_cache;
4252
4253 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4254 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4255
4256 register_hotmemory_notifier(&slab_memory_callback_nb);
4257
4258
4259 slab_state = PARTIAL;
4260
4261 create_boot_cache(kmem_cache, "kmem_cache",
4262 offsetof(struct kmem_cache, node) +
4263 nr_node_ids * sizeof(struct kmem_cache_node *),
4264 SLAB_HWCACHE_ALIGN, 0, 0);
4265
4266 kmem_cache = bootstrap(&boot_kmem_cache);
4267 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4268
4269
4270 setup_kmalloc_cache_index_table();
4271 create_kmalloc_caches(0);
4272
4273
4274 init_freelist_randomization();
4275
4276 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4277 slub_cpu_dead);
4278
4279 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4280 cache_line_size(),
4281 slub_min_order, slub_max_order, slub_min_objects,
4282 nr_cpu_ids, nr_node_ids);
4283 }
4284
4285 void __init kmem_cache_init_late(void)
4286 {
4287 }
4288
4289 struct kmem_cache *
4290 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4291 slab_flags_t flags, void (*ctor)(void *))
4292 {
4293 struct kmem_cache *s, *c;
4294
4295 s = find_mergeable(size, align, flags, name, ctor);
4296 if (s) {
4297 s->refcount++;
4298
4299
4300
4301
4302
4303 s->object_size = max(s->object_size, size);
4304 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4305
4306 for_each_memcg_cache(c, s) {
4307 c->object_size = s->object_size;
4308 c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
4309 }
4310
4311 if (sysfs_slab_alias(s, name)) {
4312 s->refcount--;
4313 s = NULL;
4314 }
4315 }
4316
4317 return s;
4318 }
4319
4320 int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4321 {
4322 int err;
4323
4324 err = kmem_cache_open(s, flags);
4325 if (err)
4326 return err;
4327
4328
4329 if (slab_state <= UP)
4330 return 0;
4331
4332 memcg_propagate_slab_attrs(s);
4333 err = sysfs_slab_add(s);
4334 if (err)
4335 __kmem_cache_release(s);
4336
4337 return err;
4338 }
4339
4340 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4341 {
4342 struct kmem_cache *s;
4343 void *ret;
4344
4345 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4346 return kmalloc_large(size, gfpflags);
4347
4348 s = kmalloc_slab(size, gfpflags);
4349
4350 if (unlikely(ZERO_OR_NULL_PTR(s)))
4351 return s;
4352
4353 ret = slab_alloc(s, gfpflags, caller);
4354
4355
4356 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4357
4358 return ret;
4359 }
4360
4361 #ifdef CONFIG_NUMA
4362 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4363 int node, unsigned long caller)
4364 {
4365 struct kmem_cache *s;
4366 void *ret;
4367
4368 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4369 ret = kmalloc_large_node(size, gfpflags, node);
4370
4371 trace_kmalloc_node(caller, ret,
4372 size, PAGE_SIZE << get_order(size),
4373 gfpflags, node);
4374
4375 return ret;
4376 }
4377
4378 s = kmalloc_slab(size, gfpflags);
4379
4380 if (unlikely(ZERO_OR_NULL_PTR(s)))
4381 return s;
4382
4383 ret = slab_alloc_node(s, gfpflags, node, caller);
4384
4385
4386 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4387
4388 return ret;
4389 }
4390 #endif
4391
4392 #ifdef CONFIG_SYSFS
4393 static int count_inuse(struct page *page)
4394 {
4395 return page->inuse;
4396 }
4397
4398 static int count_total(struct page *page)
4399 {
4400 return page->objects;
4401 }
4402 #endif
4403
4404 #ifdef CONFIG_SLUB_DEBUG
4405 static int validate_slab(struct kmem_cache *s, struct page *page,
4406 unsigned long *map)
4407 {
4408 void *p;
4409 void *addr = page_address(page);
4410
4411 if (!check_slab(s, page) ||
4412 !on_freelist(s, page, NULL))
4413 return 0;
4414
4415
4416 bitmap_zero(map, page->objects);
4417
4418 get_map(s, page, map);
4419 for_each_object(p, s, addr, page->objects) {
4420 if (test_bit(slab_index(p, s, addr), map))
4421 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4422 return 0;
4423 }
4424
4425 for_each_object(p, s, addr, page->objects)
4426 if (!test_bit(slab_index(p, s, addr), map))
4427 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4428 return 0;
4429 return 1;
4430 }
4431
4432 static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4433 unsigned long *map)
4434 {
4435 slab_lock(page);
4436 validate_slab(s, page, map);
4437 slab_unlock(page);
4438 }
4439
4440 static int validate_slab_node(struct kmem_cache *s,
4441 struct kmem_cache_node *n, unsigned long *map)
4442 {
4443 unsigned long count = 0;
4444 struct page *page;
4445 unsigned long flags;
4446
4447 spin_lock_irqsave(&n->list_lock, flags);
4448
4449 list_for_each_entry(page, &n->partial, slab_list) {
4450 validate_slab_slab(s, page, map);
4451 count++;
4452 }
4453 if (count != n->nr_partial)
4454 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4455 s->name, count, n->nr_partial);
4456
4457 if (!(s->flags & SLAB_STORE_USER))
4458 goto out;
4459
4460 list_for_each_entry(page, &n->full, slab_list) {
4461 validate_slab_slab(s, page, map);
4462 count++;
4463 }
4464 if (count != atomic_long_read(&n->nr_slabs))
4465 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4466 s->name, count, atomic_long_read(&n->nr_slabs));
4467
4468 out:
4469 spin_unlock_irqrestore(&n->list_lock, flags);
4470 return count;
4471 }
4472
4473 static long validate_slab_cache(struct kmem_cache *s)
4474 {
4475 int node;
4476 unsigned long count = 0;
4477 struct kmem_cache_node *n;
4478 unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
4479
4480 if (!map)
4481 return -ENOMEM;
4482
4483 flush_all(s);
4484 for_each_kmem_cache_node(s, node, n)
4485 count += validate_slab_node(s, n, map);
4486 bitmap_free(map);
4487 return count;
4488 }
4489
4490
4491
4492
4493
4494 struct location {
4495 unsigned long count;
4496 unsigned long addr;
4497 long long sum_time;
4498 long min_time;
4499 long max_time;
4500 long min_pid;
4501 long max_pid;
4502 DECLARE_BITMAP(cpus, NR_CPUS);
4503 nodemask_t nodes;
4504 };
4505
4506 struct loc_track {
4507 unsigned long max;
4508 unsigned long count;
4509 struct location *loc;
4510 };
4511
4512 static void free_loc_track(struct loc_track *t)
4513 {
4514 if (t->max)
4515 free_pages((unsigned long)t->loc,
4516 get_order(sizeof(struct location) * t->max));
4517 }
4518
4519 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4520 {
4521 struct location *l;
4522 int order;
4523
4524 order = get_order(sizeof(struct location) * max);
4525
4526 l = (void *)__get_free_pages(flags, order);
4527 if (!l)
4528 return 0;
4529
4530 if (t->count) {
4531 memcpy(l, t->loc, sizeof(struct location) * t->count);
4532 free_loc_track(t);
4533 }
4534 t->max = max;
4535 t->loc = l;
4536 return 1;
4537 }
4538
4539 static int add_location(struct loc_track *t, struct kmem_cache *s,
4540 const struct track *track)
4541 {
4542 long start, end, pos;
4543 struct location *l;
4544 unsigned long caddr;
4545 unsigned long age = jiffies - track->when;
4546
4547 start = -1;
4548 end = t->count;
4549
4550 for ( ; ; ) {
4551 pos = start + (end - start + 1) / 2;
4552
4553
4554
4555
4556
4557 if (pos == end)
4558 break;
4559
4560 caddr = t->loc[pos].addr;
4561 if (track->addr == caddr) {
4562
4563 l = &t->loc[pos];
4564 l->count++;
4565 if (track->when) {
4566 l->sum_time += age;
4567 if (age < l->min_time)
4568 l->min_time = age;
4569 if (age > l->max_time)
4570 l->max_time = age;
4571
4572 if (track->pid < l->min_pid)
4573 l->min_pid = track->pid;
4574 if (track->pid > l->max_pid)
4575 l->max_pid = track->pid;
4576
4577 cpumask_set_cpu(track->cpu,
4578 to_cpumask(l->cpus));
4579 }
4580 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4581 return 1;
4582 }
4583
4584 if (track->addr < caddr)
4585 end = pos;
4586 else
4587 start = pos;
4588 }
4589
4590
4591
4592
4593 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4594 return 0;
4595
4596 l = t->loc + pos;
4597 if (pos < t->count)
4598 memmove(l + 1, l,
4599 (t->count - pos) * sizeof(struct location));
4600 t->count++;
4601 l->count = 1;
4602 l->addr = track->addr;
4603 l->sum_time = age;
4604 l->min_time = age;
4605 l->max_time = age;
4606 l->min_pid = track->pid;
4607 l->max_pid = track->pid;
4608 cpumask_clear(to_cpumask(l->cpus));
4609 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4610 nodes_clear(l->nodes);
4611 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4612 return 1;
4613 }
4614
4615 static void process_slab(struct loc_track *t, struct kmem_cache *s,
4616 struct page *page, enum track_item alloc,
4617 unsigned long *map)
4618 {
4619 void *addr = page_address(page);
4620 void *p;
4621
4622 bitmap_zero(map, page->objects);
4623 get_map(s, page, map);
4624
4625 for_each_object(p, s, addr, page->objects)
4626 if (!test_bit(slab_index(p, s, addr), map))
4627 add_location(t, s, get_track(s, p, alloc));
4628 }
4629
4630 static int list_locations(struct kmem_cache *s, char *buf,
4631 enum track_item alloc)
4632 {
4633 int len = 0;
4634 unsigned long i;
4635 struct loc_track t = { 0, 0, NULL };
4636 int node;
4637 struct kmem_cache_node *n;
4638 unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
4639
4640 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4641 GFP_KERNEL)) {
4642 bitmap_free(map);
4643 return sprintf(buf, "Out of memory\n");
4644 }
4645
4646 flush_all(s);
4647
4648 for_each_kmem_cache_node(s, node, n) {
4649 unsigned long flags;
4650 struct page *page;
4651
4652 if (!atomic_long_read(&n->nr_slabs))
4653 continue;
4654
4655 spin_lock_irqsave(&n->list_lock, flags);
4656 list_for_each_entry(page, &n->partial, slab_list)
4657 process_slab(&t, s, page, alloc, map);
4658 list_for_each_entry(page, &n->full, slab_list)
4659 process_slab(&t, s, page, alloc, map);
4660 spin_unlock_irqrestore(&n->list_lock, flags);
4661 }
4662
4663 for (i = 0; i < t.count; i++) {
4664 struct location *l = &t.loc[i];
4665
4666 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4667 break;
4668 len += sprintf(buf + len, "%7ld ", l->count);
4669
4670 if (l->addr)
4671 len += sprintf(buf + len, "%pS", (void *)l->addr);
4672 else
4673 len += sprintf(buf + len, "<not-available>");
4674
4675 if (l->sum_time != l->min_time) {
4676 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4677 l->min_time,
4678 (long)div_u64(l->sum_time, l->count),
4679 l->max_time);
4680 } else
4681 len += sprintf(buf + len, " age=%ld",
4682 l->min_time);
4683
4684 if (l->min_pid != l->max_pid)
4685 len += sprintf(buf + len, " pid=%ld-%ld",
4686 l->min_pid, l->max_pid);
4687 else
4688 len += sprintf(buf + len, " pid=%ld",
4689 l->min_pid);
4690
4691 if (num_online_cpus() > 1 &&
4692 !cpumask_empty(to_cpumask(l->cpus)) &&
4693 len < PAGE_SIZE - 60)
4694 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4695 " cpus=%*pbl",
4696 cpumask_pr_args(to_cpumask(l->cpus)));
4697
4698 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4699 len < PAGE_SIZE - 60)
4700 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4701 " nodes=%*pbl",
4702 nodemask_pr_args(&l->nodes));
4703
4704 len += sprintf(buf + len, "\n");
4705 }
4706
4707 free_loc_track(&t);
4708 bitmap_free(map);
4709 if (!t.count)
4710 len += sprintf(buf, "No data\n");
4711 return len;
4712 }
4713 #endif
4714
4715 #ifdef SLUB_RESILIENCY_TEST
4716 static void __init resiliency_test(void)
4717 {
4718 u8 *p;
4719 int type = KMALLOC_NORMAL;
4720
4721 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4722
4723 pr_err("SLUB resiliency testing\n");
4724 pr_err("-----------------------\n");
4725 pr_err("A. Corruption after allocation\n");
4726
4727 p = kzalloc(16, GFP_KERNEL);
4728 p[16] = 0x12;
4729 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4730 p + 16);
4731
4732 validate_slab_cache(kmalloc_caches[type][4]);
4733
4734
4735 p = kzalloc(32, GFP_KERNEL);
4736 p[32 + sizeof(void *)] = 0x34;
4737 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4738 p);
4739 pr_err("If allocated object is overwritten then not detectable\n\n");
4740
4741 validate_slab_cache(kmalloc_caches[type][5]);
4742 p = kzalloc(64, GFP_KERNEL);
4743 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4744 *p = 0x56;
4745 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4746 p);
4747 pr_err("If allocated object is overwritten then not detectable\n\n");
4748 validate_slab_cache(kmalloc_caches[type][6]);
4749
4750 pr_err("\nB. Corruption after free\n");
4751 p = kzalloc(128, GFP_KERNEL);
4752 kfree(p);
4753 *p = 0x78;
4754 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4755 validate_slab_cache(kmalloc_caches[type][7]);
4756
4757 p = kzalloc(256, GFP_KERNEL);
4758 kfree(p);
4759 p[50] = 0x9a;
4760 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4761 validate_slab_cache(kmalloc_caches[type][8]);
4762
4763 p = kzalloc(512, GFP_KERNEL);
4764 kfree(p);
4765 p[512] = 0xab;
4766 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4767 validate_slab_cache(kmalloc_caches[type][9]);
4768 }
4769 #else
4770 #ifdef CONFIG_SYSFS
4771 static void resiliency_test(void) {};
4772 #endif
4773 #endif
4774
4775 #ifdef CONFIG_SYSFS
4776 enum slab_stat_type {
4777 SL_ALL,
4778 SL_PARTIAL,
4779 SL_CPU,
4780 SL_OBJECTS,
4781 SL_TOTAL
4782 };
4783
4784 #define SO_ALL (1 << SL_ALL)
4785 #define SO_PARTIAL (1 << SL_PARTIAL)
4786 #define SO_CPU (1 << SL_CPU)
4787 #define SO_OBJECTS (1 << SL_OBJECTS)
4788 #define SO_TOTAL (1 << SL_TOTAL)
4789
4790 #ifdef CONFIG_MEMCG
4791 static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
4792
4793 static int __init setup_slub_memcg_sysfs(char *str)
4794 {
4795 int v;
4796
4797 if (get_option(&str, &v) > 0)
4798 memcg_sysfs_enabled = v;
4799
4800 return 1;
4801 }
4802
4803 __setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
4804 #endif
4805
4806 static ssize_t show_slab_objects(struct kmem_cache *s,
4807 char *buf, unsigned long flags)
4808 {
4809 unsigned long total = 0;
4810 int node;
4811 int x;
4812 unsigned long *nodes;
4813
4814 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4815 if (!nodes)
4816 return -ENOMEM;
4817
4818 if (flags & SO_CPU) {
4819 int cpu;
4820
4821 for_each_possible_cpu(cpu) {
4822 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4823 cpu);
4824 int node;
4825 struct page *page;
4826
4827 page = READ_ONCE(c->page);
4828 if (!page)
4829 continue;
4830
4831 node = page_to_nid(page);
4832 if (flags & SO_TOTAL)
4833 x = page->objects;
4834 else if (flags & SO_OBJECTS)
4835 x = page->inuse;
4836 else
4837 x = 1;
4838
4839 total += x;
4840 nodes[node] += x;
4841
4842 page = slub_percpu_partial_read_once(c);
4843 if (page) {
4844 node = page_to_nid(page);
4845 if (flags & SO_TOTAL)
4846 WARN_ON_ONCE(1);
4847 else if (flags & SO_OBJECTS)
4848 WARN_ON_ONCE(1);
4849 else
4850 x = page->pages;
4851 total += x;
4852 nodes[node] += x;
4853 }
4854 }
4855 }
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868 #ifdef CONFIG_SLUB_DEBUG
4869 if (flags & SO_ALL) {
4870 struct kmem_cache_node *n;
4871
4872 for_each_kmem_cache_node(s, node, n) {
4873
4874 if (flags & SO_TOTAL)
4875 x = atomic_long_read(&n->total_objects);
4876 else if (flags & SO_OBJECTS)
4877 x = atomic_long_read(&n->total_objects) -
4878 count_partial(n, count_free);
4879 else
4880 x = atomic_long_read(&n->nr_slabs);
4881 total += x;
4882 nodes[node] += x;
4883 }
4884
4885 } else
4886 #endif
4887 if (flags & SO_PARTIAL) {
4888 struct kmem_cache_node *n;
4889
4890 for_each_kmem_cache_node(s, node, n) {
4891 if (flags & SO_TOTAL)
4892 x = count_partial(n, count_total);
4893 else if (flags & SO_OBJECTS)
4894 x = count_partial(n, count_inuse);
4895 else
4896 x = n->nr_partial;
4897 total += x;
4898 nodes[node] += x;
4899 }
4900 }
4901 x = sprintf(buf, "%lu", total);
4902 #ifdef CONFIG_NUMA
4903 for (node = 0; node < nr_node_ids; node++)
4904 if (nodes[node])
4905 x += sprintf(buf + x, " N%d=%lu",
4906 node, nodes[node]);
4907 #endif
4908 kfree(nodes);
4909 return x + sprintf(buf + x, "\n");
4910 }
4911
4912 #ifdef CONFIG_SLUB_DEBUG
4913 static int any_slab_objects(struct kmem_cache *s)
4914 {
4915 int node;
4916 struct kmem_cache_node *n;
4917
4918 for_each_kmem_cache_node(s, node, n)
4919 if (atomic_long_read(&n->total_objects))
4920 return 1;
4921
4922 return 0;
4923 }
4924 #endif
4925
4926 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4927 #define to_slab(n) container_of(n, struct kmem_cache, kobj)
4928
4929 struct slab_attribute {
4930 struct attribute attr;
4931 ssize_t (*show)(struct kmem_cache *s, char *buf);
4932 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4933 };
4934
4935 #define SLAB_ATTR_RO(_name) \
4936 static struct slab_attribute _name##_attr = \
4937 __ATTR(_name, 0400, _name##_show, NULL)
4938
4939 #define SLAB_ATTR(_name) \
4940 static struct slab_attribute _name##_attr = \
4941 __ATTR(_name, 0600, _name##_show, _name##_store)
4942
4943 static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4944 {
4945 return sprintf(buf, "%u\n", s->size);
4946 }
4947 SLAB_ATTR_RO(slab_size);
4948
4949 static ssize_t align_show(struct kmem_cache *s, char *buf)
4950 {
4951 return sprintf(buf, "%u\n", s->align);
4952 }
4953 SLAB_ATTR_RO(align);
4954
4955 static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4956 {
4957 return sprintf(buf, "%u\n", s->object_size);
4958 }
4959 SLAB_ATTR_RO(object_size);
4960
4961 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4962 {
4963 return sprintf(buf, "%u\n", oo_objects(s->oo));
4964 }
4965 SLAB_ATTR_RO(objs_per_slab);
4966
4967 static ssize_t order_store(struct kmem_cache *s,
4968 const char *buf, size_t length)
4969 {
4970 unsigned int order;
4971 int err;
4972
4973 err = kstrtouint(buf, 10, &order);
4974 if (err)
4975 return err;
4976
4977 if (order > slub_max_order || order < slub_min_order)
4978 return -EINVAL;
4979
4980 calculate_sizes(s, order);
4981 return length;
4982 }
4983
4984 static ssize_t order_show(struct kmem_cache *s, char *buf)
4985 {
4986 return sprintf(buf, "%u\n", oo_order(s->oo));
4987 }
4988 SLAB_ATTR(order);
4989
4990 static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4991 {
4992 return sprintf(buf, "%lu\n", s->min_partial);
4993 }
4994
4995 static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4996 size_t length)
4997 {
4998 unsigned long min;
4999 int err;
5000
5001 err = kstrtoul(buf, 10, &min);
5002 if (err)
5003 return err;
5004
5005 set_min_partial(s, min);
5006 return length;
5007 }
5008 SLAB_ATTR(min_partial);
5009
5010 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5011 {
5012 return sprintf(buf, "%u\n", slub_cpu_partial(s));
5013 }
5014
5015 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5016 size_t length)
5017 {
5018 unsigned int objects;
5019 int err;
5020
5021 err = kstrtouint(buf, 10, &objects);
5022 if (err)
5023 return err;
5024 if (objects && !kmem_cache_has_cpu_partial(s))
5025 return -EINVAL;
5026
5027 slub_set_cpu_partial(s, objects);
5028 flush_all(s);
5029 return length;
5030 }
5031 SLAB_ATTR(cpu_partial);
5032
5033 static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5034 {
5035 if (!s->ctor)
5036 return 0;
5037 return sprintf(buf, "%pS\n", s->ctor);
5038 }
5039 SLAB_ATTR_RO(ctor);
5040
5041 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5042 {
5043 return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5044 }
5045 SLAB_ATTR_RO(aliases);
5046
5047 static ssize_t partial_show(struct kmem_cache *s, char *buf)
5048 {
5049 return show_slab_objects(s, buf, SO_PARTIAL);
5050 }
5051 SLAB_ATTR_RO(partial);
5052
5053 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5054 {
5055 return show_slab_objects(s, buf, SO_CPU);
5056 }
5057 SLAB_ATTR_RO(cpu_slabs);
5058
5059 static ssize_t objects_show(struct kmem_cache *s, char *buf)
5060 {
5061 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5062 }
5063 SLAB_ATTR_RO(objects);
5064
5065 static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5066 {
5067 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5068 }
5069 SLAB_ATTR_RO(objects_partial);
5070
5071 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5072 {
5073 int objects = 0;
5074 int pages = 0;
5075 int cpu;
5076 int len;
5077
5078 for_each_online_cpu(cpu) {
5079 struct page *page;
5080
5081 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5082
5083 if (page) {
5084 pages += page->pages;
5085 objects += page->pobjects;
5086 }
5087 }
5088
5089 len = sprintf(buf, "%d(%d)", objects, pages);
5090
5091 #ifdef CONFIG_SMP
5092 for_each_online_cpu(cpu) {
5093 struct page *page;
5094
5095 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5096
5097 if (page && len < PAGE_SIZE - 20)
5098 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
5099 page->pobjects, page->pages);
5100 }
5101 #endif
5102 return len + sprintf(buf + len, "\n");
5103 }
5104 SLAB_ATTR_RO(slabs_cpu_partial);
5105
5106 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5107 {
5108 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5109 }
5110
5111 static ssize_t reclaim_account_store(struct kmem_cache *s,
5112 const char *buf, size_t length)
5113 {
5114 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5115 if (buf[0] == '1')
5116 s->flags |= SLAB_RECLAIM_ACCOUNT;
5117 return length;
5118 }
5119 SLAB_ATTR(reclaim_account);
5120
5121 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5122 {
5123 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5124 }
5125 SLAB_ATTR_RO(hwcache_align);
5126
5127 #ifdef CONFIG_ZONE_DMA
5128 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5129 {
5130 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5131 }
5132 SLAB_ATTR_RO(cache_dma);
5133 #endif
5134
5135 static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5136 {
5137 return sprintf(buf, "%u\n", s->usersize);
5138 }
5139 SLAB_ATTR_RO(usersize);
5140
5141 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5142 {
5143 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5144 }
5145 SLAB_ATTR_RO(destroy_by_rcu);
5146
5147 #ifdef CONFIG_SLUB_DEBUG
5148 static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5149 {
5150 return show_slab_objects(s, buf, SO_ALL);
5151 }
5152 SLAB_ATTR_RO(slabs);
5153
5154 static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5155 {
5156 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5157 }
5158 SLAB_ATTR_RO(total_objects);
5159
5160 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5161 {
5162 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5163 }
5164
5165 static ssize_t sanity_checks_store(struct kmem_cache *s,
5166 const char *buf, size_t length)
5167 {
5168 s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5169 if (buf[0] == '1') {
5170 s->flags &= ~__CMPXCHG_DOUBLE;
5171 s->flags |= SLAB_CONSISTENCY_CHECKS;
5172 }
5173 return length;
5174 }
5175 SLAB_ATTR(sanity_checks);
5176
5177 static ssize_t trace_show(struct kmem_cache *s, char *buf)
5178 {
5179 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5180 }
5181
5182 static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5183 size_t length)
5184 {
5185
5186
5187
5188
5189
5190 if (s->refcount > 1)
5191 return -EINVAL;
5192
5193 s->flags &= ~SLAB_TRACE;
5194 if (buf[0] == '1') {
5195 s->flags &= ~__CMPXCHG_DOUBLE;
5196 s->flags |= SLAB_TRACE;
5197 }
5198 return length;
5199 }
5200 SLAB_ATTR(trace);
5201
5202 static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5203 {
5204 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5205 }
5206
5207 static ssize_t red_zone_store(struct kmem_cache *s,
5208 const char *buf, size_t length)
5209 {
5210 if (any_slab_objects(s))
5211 return -EBUSY;
5212
5213 s->flags &= ~SLAB_RED_ZONE;
5214 if (buf[0] == '1') {
5215 s->flags |= SLAB_RED_ZONE;
5216 }
5217 calculate_sizes(s, -1);
5218 return length;
5219 }
5220 SLAB_ATTR(red_zone);
5221
5222 static ssize_t poison_show(struct kmem_cache *s, char *buf)
5223 {
5224 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5225 }
5226
5227 static ssize_t poison_store(struct kmem_cache *s,
5228 const char *buf, size_t length)
5229 {
5230 if (any_slab_objects(s))
5231 return -EBUSY;
5232
5233 s->flags &= ~SLAB_POISON;
5234 if (buf[0] == '1') {
5235 s->flags |= SLAB_POISON;
5236 }
5237 calculate_sizes(s, -1);
5238 return length;
5239 }
5240 SLAB_ATTR(poison);
5241
5242 static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5243 {
5244 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5245 }
5246
5247 static ssize_t store_user_store(struct kmem_cache *s,
5248 const char *buf, size_t length)
5249 {
5250 if (any_slab_objects(s))
5251 return -EBUSY;
5252
5253 s->flags &= ~SLAB_STORE_USER;
5254 if (buf[0] == '1') {
5255 s->flags &= ~__CMPXCHG_DOUBLE;
5256 s->flags |= SLAB_STORE_USER;
5257 }
5258 calculate_sizes(s, -1);
5259 return length;
5260 }
5261 SLAB_ATTR(store_user);
5262
5263 static ssize_t validate_show(struct kmem_cache *s, char *buf)
5264 {
5265 return 0;
5266 }
5267
5268 static ssize_t validate_store(struct kmem_cache *s,
5269 const char *buf, size_t length)
5270 {
5271 int ret = -EINVAL;
5272
5273 if (buf[0] == '1') {
5274 ret = validate_slab_cache(s);
5275 if (ret >= 0)
5276 ret = length;
5277 }
5278 return ret;
5279 }
5280 SLAB_ATTR(validate);
5281
5282 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5283 {
5284 if (!(s->flags & SLAB_STORE_USER))
5285 return -ENOSYS;
5286 return list_locations(s, buf, TRACK_ALLOC);
5287 }
5288 SLAB_ATTR_RO(alloc_calls);
5289
5290 static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5291 {
5292 if (!(s->flags & SLAB_STORE_USER))
5293 return -ENOSYS;
5294 return list_locations(s, buf, TRACK_FREE);
5295 }
5296 SLAB_ATTR_RO(free_calls);
5297 #endif
5298
5299 #ifdef CONFIG_FAILSLAB
5300 static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5301 {
5302 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5303 }
5304
5305 static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5306 size_t length)
5307 {
5308 if (s->refcount > 1)
5309 return -EINVAL;
5310
5311 s->flags &= ~SLAB_FAILSLAB;
5312 if (buf[0] == '1')
5313 s->flags |= SLAB_FAILSLAB;
5314 return length;
5315 }
5316 SLAB_ATTR(failslab);
5317 #endif
5318
5319 static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5320 {
5321 return 0;
5322 }
5323
5324 static ssize_t shrink_store(struct kmem_cache *s,
5325 const char *buf, size_t length)
5326 {
5327 if (buf[0] == '1')
5328 kmem_cache_shrink_all(s);
5329 else
5330 return -EINVAL;
5331 return length;
5332 }
5333 SLAB_ATTR(shrink);
5334
5335 #ifdef CONFIG_NUMA
5336 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5337 {
5338 return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5339 }
5340
5341 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5342 const char *buf, size_t length)
5343 {
5344 unsigned int ratio;
5345 int err;
5346
5347 err = kstrtouint(buf, 10, &ratio);
5348 if (err)
5349 return err;
5350 if (ratio > 100)
5351 return -ERANGE;
5352
5353 s->remote_node_defrag_ratio = ratio * 10;
5354
5355 return length;
5356 }
5357 SLAB_ATTR(remote_node_defrag_ratio);
5358 #endif
5359
5360 #ifdef CONFIG_SLUB_STATS
5361 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5362 {
5363 unsigned long sum = 0;
5364 int cpu;
5365 int len;
5366 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5367
5368 if (!data)
5369 return -ENOMEM;
5370
5371 for_each_online_cpu(cpu) {
5372 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5373
5374 data[cpu] = x;
5375 sum += x;
5376 }
5377
5378 len = sprintf(buf, "%lu", sum);
5379
5380 #ifdef CONFIG_SMP
5381 for_each_online_cpu(cpu) {
5382 if (data[cpu] && len < PAGE_SIZE - 20)
5383 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5384 }
5385 #endif
5386 kfree(data);
5387 return len + sprintf(buf + len, "\n");
5388 }
5389
5390 static void clear_stat(struct kmem_cache *s, enum stat_item si)
5391 {
5392 int cpu;
5393
5394 for_each_online_cpu(cpu)
5395 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5396 }
5397
5398 #define STAT_ATTR(si, text) \
5399 static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5400 { \
5401 return show_stat(s, buf, si); \
5402 } \
5403 static ssize_t text##_store(struct kmem_cache *s, \
5404 const char *buf, size_t length) \
5405 { \
5406 if (buf[0] != '0') \
5407 return -EINVAL; \
5408 clear_stat(s, si); \
5409 return length; \
5410 } \
5411 SLAB_ATTR(text); \
5412
5413 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5414 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5415 STAT_ATTR(FREE_FASTPATH, free_fastpath);
5416 STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5417 STAT_ATTR(FREE_FROZEN, free_frozen);
5418 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5419 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5420 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5421 STAT_ATTR(ALLOC_SLAB, alloc_slab);
5422 STAT_ATTR(ALLOC_REFILL, alloc_refill);
5423 STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5424 STAT_ATTR(FREE_SLAB, free_slab);
5425 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5426 STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5427 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5428 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5429 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5430 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5431 STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5432 STAT_ATTR(ORDER_FALLBACK, order_fallback);
5433 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5434 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5435 STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5436 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5437 STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5438 STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5439 #endif
5440
5441 static struct attribute *slab_attrs[] = {
5442 &slab_size_attr.attr,
5443 &object_size_attr.attr,
5444 &objs_per_slab_attr.attr,
5445 &order_attr.attr,
5446 &min_partial_attr.attr,
5447 &cpu_partial_attr.attr,
5448 &objects_attr.attr,
5449 &objects_partial_attr.attr,
5450 &partial_attr.attr,
5451 &cpu_slabs_attr.attr,
5452 &ctor_attr.attr,
5453 &aliases_attr.attr,
5454 &align_attr.attr,
5455 &hwcache_align_attr.attr,
5456 &reclaim_account_attr.attr,
5457 &destroy_by_rcu_attr.attr,
5458 &shrink_attr.attr,
5459 &slabs_cpu_partial_attr.attr,
5460 #ifdef CONFIG_SLUB_DEBUG
5461 &total_objects_attr.attr,
5462 &slabs_attr.attr,
5463 &sanity_checks_attr.attr,
5464 &trace_attr.attr,
5465 &red_zone_attr.attr,
5466 &poison_attr.attr,
5467 &store_user_attr.attr,
5468 &validate_attr.attr,
5469 &alloc_calls_attr.attr,
5470 &free_calls_attr.attr,
5471 #endif
5472 #ifdef CONFIG_ZONE_DMA
5473 &cache_dma_attr.attr,
5474 #endif
5475 #ifdef CONFIG_NUMA
5476 &remote_node_defrag_ratio_attr.attr,
5477 #endif
5478 #ifdef CONFIG_SLUB_STATS
5479 &alloc_fastpath_attr.attr,
5480 &alloc_slowpath_attr.attr,
5481 &free_fastpath_attr.attr,
5482 &free_slowpath_attr.attr,
5483 &free_frozen_attr.attr,
5484 &free_add_partial_attr.attr,
5485 &free_remove_partial_attr.attr,
5486 &alloc_from_partial_attr.attr,
5487 &alloc_slab_attr.attr,
5488 &alloc_refill_attr.attr,
5489 &alloc_node_mismatch_attr.attr,
5490 &free_slab_attr.attr,
5491 &cpuslab_flush_attr.attr,
5492 &deactivate_full_attr.attr,
5493 &deactivate_empty_attr.attr,
5494 &deactivate_to_head_attr.attr,
5495 &deactivate_to_tail_attr.attr,
5496 &deactivate_remote_frees_attr.attr,
5497 &deactivate_bypass_attr.attr,
5498 &order_fallback_attr.attr,
5499 &cmpxchg_double_fail_attr.attr,
5500 &cmpxchg_double_cpu_fail_attr.attr,
5501 &cpu_partial_alloc_attr.attr,
5502 &cpu_partial_free_attr.attr,
5503 &cpu_partial_node_attr.attr,
5504 &cpu_partial_drain_attr.attr,
5505 #endif
5506 #ifdef CONFIG_FAILSLAB
5507 &failslab_attr.attr,
5508 #endif
5509 &usersize_attr.attr,
5510
5511 NULL
5512 };
5513
5514 static const struct attribute_group slab_attr_group = {
5515 .attrs = slab_attrs,
5516 };
5517
5518 static ssize_t slab_attr_show(struct kobject *kobj,
5519 struct attribute *attr,
5520 char *buf)
5521 {
5522 struct slab_attribute *attribute;
5523 struct kmem_cache *s;
5524 int err;
5525
5526 attribute = to_slab_attr(attr);
5527 s = to_slab(kobj);
5528
5529 if (!attribute->show)
5530 return -EIO;
5531
5532 err = attribute->show(s, buf);
5533
5534 return err;
5535 }
5536
5537 static ssize_t slab_attr_store(struct kobject *kobj,
5538 struct attribute *attr,
5539 const char *buf, size_t len)
5540 {
5541 struct slab_attribute *attribute;
5542 struct kmem_cache *s;
5543 int err;
5544
5545 attribute = to_slab_attr(attr);
5546 s = to_slab(kobj);
5547
5548 if (!attribute->store)
5549 return -EIO;
5550
5551 err = attribute->store(s, buf, len);
5552 #ifdef CONFIG_MEMCG
5553 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5554 struct kmem_cache *c;
5555
5556 mutex_lock(&slab_mutex);
5557 if (s->max_attr_size < len)
5558 s->max_attr_size = len;
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577 for_each_memcg_cache(c, s)
5578 attribute->store(c, buf, len);
5579 mutex_unlock(&slab_mutex);
5580 }
5581 #endif
5582 return err;
5583 }
5584
5585 static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5586 {
5587 #ifdef CONFIG_MEMCG
5588 int i;
5589 char *buffer = NULL;
5590 struct kmem_cache *root_cache;
5591
5592 if (is_root_cache(s))
5593 return;
5594
5595 root_cache = s->memcg_params.root_cache;
5596
5597
5598
5599
5600
5601 if (!root_cache->max_attr_size)
5602 return;
5603
5604 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5605 char mbuf[64];
5606 char *buf;
5607 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5608 ssize_t len;
5609
5610 if (!attr || !attr->store || !attr->show)
5611 continue;
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622 if (buffer)
5623 buf = buffer;
5624 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5625 buf = mbuf;
5626 else {
5627 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5628 if (WARN_ON(!buffer))
5629 continue;
5630 buf = buffer;
5631 }
5632
5633 len = attr->show(root_cache, buf);
5634 if (len > 0)
5635 attr->store(s, buf, len);
5636 }
5637
5638 if (buffer)
5639 free_page((unsigned long)buffer);
5640 #endif
5641 }
5642
5643 static void kmem_cache_release(struct kobject *k)
5644 {
5645 slab_kmem_cache_release(to_slab(k));
5646 }
5647
5648 static const struct sysfs_ops slab_sysfs_ops = {
5649 .show = slab_attr_show,
5650 .store = slab_attr_store,
5651 };
5652
5653 static struct kobj_type slab_ktype = {
5654 .sysfs_ops = &slab_sysfs_ops,
5655 .release = kmem_cache_release,
5656 };
5657
5658 static int uevent_filter(struct kset *kset, struct kobject *kobj)
5659 {
5660 struct kobj_type *ktype = get_ktype(kobj);
5661
5662 if (ktype == &slab_ktype)
5663 return 1;
5664 return 0;
5665 }
5666
5667 static const struct kset_uevent_ops slab_uevent_ops = {
5668 .filter = uevent_filter,
5669 };
5670
5671 static struct kset *slab_kset;
5672
5673 static inline struct kset *cache_kset(struct kmem_cache *s)
5674 {
5675 #ifdef CONFIG_MEMCG
5676 if (!is_root_cache(s))
5677 return s->memcg_params.root_cache->memcg_kset;
5678 #endif
5679 return slab_kset;
5680 }
5681
5682 #define ID_STR_LENGTH 64
5683
5684
5685
5686
5687
5688 static char *create_unique_id(struct kmem_cache *s)
5689 {
5690 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5691 char *p = name;
5692
5693 BUG_ON(!name);
5694
5695 *p++ = ':';
5696
5697
5698
5699
5700
5701
5702
5703 if (s->flags & SLAB_CACHE_DMA)
5704 *p++ = 'd';
5705 if (s->flags & SLAB_CACHE_DMA32)
5706 *p++ = 'D';
5707 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5708 *p++ = 'a';
5709 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5710 *p++ = 'F';
5711 if (s->flags & SLAB_ACCOUNT)
5712 *p++ = 'A';
5713 if (p != name + 1)
5714 *p++ = '-';
5715 p += sprintf(p, "%07u", s->size);
5716
5717 BUG_ON(p > name + ID_STR_LENGTH - 1);
5718 return name;
5719 }
5720
5721 static void sysfs_slab_remove_workfn(struct work_struct *work)
5722 {
5723 struct kmem_cache *s =
5724 container_of(work, struct kmem_cache, kobj_remove_work);
5725
5726 if (!s->kobj.state_in_sysfs)
5727
5728
5729
5730
5731
5732
5733 goto out;
5734
5735 #ifdef CONFIG_MEMCG
5736 kset_unregister(s->memcg_kset);
5737 #endif
5738 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5739 out:
5740 kobject_put(&s->kobj);
5741 }
5742
5743 static int sysfs_slab_add(struct kmem_cache *s)
5744 {
5745 int err;
5746 const char *name;
5747 struct kset *kset = cache_kset(s);
5748 int unmergeable = slab_unmergeable(s);
5749
5750 INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
5751
5752 if (!kset) {
5753 kobject_init(&s->kobj, &slab_ktype);
5754 return 0;
5755 }
5756
5757 if (!unmergeable && disable_higher_order_debug &&
5758 (slub_debug & DEBUG_METADATA_FLAGS))
5759 unmergeable = 1;
5760
5761 if (unmergeable) {
5762
5763
5764
5765
5766
5767 sysfs_remove_link(&slab_kset->kobj, s->name);
5768 name = s->name;
5769 } else {
5770
5771
5772
5773
5774 name = create_unique_id(s);
5775 }
5776
5777 s->kobj.kset = kset;
5778 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5779 if (err) {
5780 kobject_put(&s->kobj);
5781 goto out;
5782 }
5783
5784 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5785 if (err)
5786 goto out_del_kobj;
5787
5788 #ifdef CONFIG_MEMCG
5789 if (is_root_cache(s) && memcg_sysfs_enabled) {
5790 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5791 if (!s->memcg_kset) {
5792 err = -ENOMEM;
5793 goto out_del_kobj;
5794 }
5795 }
5796 #endif
5797
5798 kobject_uevent(&s->kobj, KOBJ_ADD);
5799 if (!unmergeable) {
5800
5801 sysfs_slab_alias(s, s->name);
5802 }
5803 out:
5804 if (!unmergeable)
5805 kfree(name);
5806 return err;
5807 out_del_kobj:
5808 kobject_del(&s->kobj);
5809 goto out;
5810 }
5811
5812 static void sysfs_slab_remove(struct kmem_cache *s)
5813 {
5814 if (slab_state < FULL)
5815
5816
5817
5818
5819 return;
5820
5821 kobject_get(&s->kobj);
5822 schedule_work(&s->kobj_remove_work);
5823 }
5824
5825 void sysfs_slab_unlink(struct kmem_cache *s)
5826 {
5827 if (slab_state >= FULL)
5828 kobject_del(&s->kobj);
5829 }
5830
5831 void sysfs_slab_release(struct kmem_cache *s)
5832 {
5833 if (slab_state >= FULL)
5834 kobject_put(&s->kobj);
5835 }
5836
5837
5838
5839
5840
5841 struct saved_alias {
5842 struct kmem_cache *s;
5843 const char *name;
5844 struct saved_alias *next;
5845 };
5846
5847 static struct saved_alias *alias_list;
5848
5849 static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5850 {
5851 struct saved_alias *al;
5852
5853 if (slab_state == FULL) {
5854
5855
5856
5857 sysfs_remove_link(&slab_kset->kobj, name);
5858 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5859 }
5860
5861 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5862 if (!al)
5863 return -ENOMEM;
5864
5865 al->s = s;
5866 al->name = name;
5867 al->next = alias_list;
5868 alias_list = al;
5869 return 0;
5870 }
5871
5872 static int __init slab_sysfs_init(void)
5873 {
5874 struct kmem_cache *s;
5875 int err;
5876
5877 mutex_lock(&slab_mutex);
5878
5879 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5880 if (!slab_kset) {
5881 mutex_unlock(&slab_mutex);
5882 pr_err("Cannot register slab subsystem.\n");
5883 return -ENOSYS;
5884 }
5885
5886 slab_state = FULL;
5887
5888 list_for_each_entry(s, &slab_caches, list) {
5889 err = sysfs_slab_add(s);
5890 if (err)
5891 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5892 s->name);
5893 }
5894
5895 while (alias_list) {
5896 struct saved_alias *al = alias_list;
5897
5898 alias_list = alias_list->next;
5899 err = sysfs_slab_alias(al->s, al->name);
5900 if (err)
5901 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5902 al->name);
5903 kfree(al);
5904 }
5905
5906 mutex_unlock(&slab_mutex);
5907 resiliency_test();
5908 return 0;
5909 }
5910
5911 __initcall(slab_sysfs_init);
5912 #endif
5913
5914
5915
5916
5917 #ifdef CONFIG_SLUB_DEBUG
5918 void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5919 {
5920 unsigned long nr_slabs = 0;
5921 unsigned long nr_objs = 0;
5922 unsigned long nr_free = 0;
5923 int node;
5924 struct kmem_cache_node *n;
5925
5926 for_each_kmem_cache_node(s, node, n) {
5927 nr_slabs += node_nr_slabs(n);
5928 nr_objs += node_nr_objs(n);
5929 nr_free += count_partial(n, count_free);
5930 }
5931
5932 sinfo->active_objs = nr_objs - nr_free;
5933 sinfo->num_objs = nr_objs;
5934 sinfo->active_slabs = nr_slabs;
5935 sinfo->num_slabs = nr_slabs;
5936 sinfo->objects_per_slab = oo_objects(s->oo);
5937 sinfo->cache_order = oo_order(s->oo);
5938 }
5939
5940 void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5941 {
5942 }
5943
5944 ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5945 size_t count, loff_t *ppos)
5946 {
5947 return -EIO;
5948 }
5949 #endif