1/*
2 * Slab allocator functions that are independent of the allocator strategy
3 *
4 * (C) 2012 Christoph Lameter <cl@linux.com>
5 */
6#include <linux/slab.h>
7
8#include <linux/mm.h>
9#include <linux/poison.h>
10#include <linux/interrupt.h>
11#include <linux/memory.h>
12#include <linux/compiler.h>
13#include <linux/module.h>
14#include <linux/cpu.h>
15#include <linux/uaccess.h>
16#include <linux/seq_file.h>
17#include <linux/proc_fs.h>
18#include <asm/cacheflush.h>
19#include <asm/tlbflush.h>
20#include <asm/page.h>
21#include <linux/memcontrol.h>
22
23#define CREATE_TRACE_POINTS
24#include <trace/events/kmem.h>
25
26#include "slab.h"
27
28enum slab_state slab_state;
29LIST_HEAD(slab_caches);
30DEFINE_MUTEX(slab_mutex);
31struct kmem_cache *kmem_cache;
32
33/*
34 * Set of flags that will prevent slab merging
35 */
36#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
37		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
38		SLAB_FAILSLAB)
39
40#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
41
42/*
43 * Merge control. If this is set then no merging of slab caches will occur.
44 * (Could be removed. This was introduced to pacify the merge skeptics.)
45 */
46static int slab_nomerge;
47
48static int __init setup_slab_nomerge(char *str)
49{
50	slab_nomerge = 1;
51	return 1;
52}
53
54#ifdef CONFIG_SLUB
55__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
56#endif
57
58__setup("slab_nomerge", setup_slab_nomerge);
59
60/*
61 * Determine the size of a slab object
62 */
63unsigned int kmem_cache_size(struct kmem_cache *s)
64{
65	return s->object_size;
66}
67EXPORT_SYMBOL(kmem_cache_size);
68
69#ifdef CONFIG_DEBUG_VM
70static int kmem_cache_sanity_check(const char *name, size_t size)
71{
72	struct kmem_cache *s = NULL;
73
74	if (!name || in_interrupt() || size < sizeof(void *) ||
75		size > KMALLOC_MAX_SIZE) {
76		pr_err("kmem_cache_create(%s) integrity check failed\n", name);
77		return -EINVAL;
78	}
79
80	list_for_each_entry(s, &slab_caches, list) {
81		char tmp;
82		int res;
83
84		/*
85		 * This happens when the module gets unloaded and doesn't
86		 * destroy its slab cache and no-one else reuses the vmalloc
87		 * area of the module.  Print a warning.
88		 */
89		res = probe_kernel_address(s->name, tmp);
90		if (res) {
91			pr_err("Slab cache with size %d has lost its name\n",
92			       s->object_size);
93			continue;
94		}
95	}
96
97	WARN_ON(strchr(name, ' '));	/* It confuses parsers */
98	return 0;
99}
100#else
101static inline int kmem_cache_sanity_check(const char *name, size_t size)
102{
103	return 0;
104}
105#endif
106
107void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
108{
109	size_t i;
110
111	for (i = 0; i < nr; i++)
112		kmem_cache_free(s, p[i]);
113}
114
115int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
116								void **p)
117{
118	size_t i;
119
120	for (i = 0; i < nr; i++) {
121		void *x = p[i] = kmem_cache_alloc(s, flags);
122		if (!x) {
123			__kmem_cache_free_bulk(s, i, p);
124			return 0;
125		}
126	}
127	return i;
128}
129
130#ifdef CONFIG_MEMCG_KMEM
131void slab_init_memcg_params(struct kmem_cache *s)
132{
133	s->memcg_params.is_root_cache = true;
134	INIT_LIST_HEAD(&s->memcg_params.list);
135	RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
136}
137
138static int init_memcg_params(struct kmem_cache *s,
139		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
140{
141	struct memcg_cache_array *arr;
142
143	if (memcg) {
144		s->memcg_params.is_root_cache = false;
145		s->memcg_params.memcg = memcg;
146		s->memcg_params.root_cache = root_cache;
147		return 0;
148	}
149
150	slab_init_memcg_params(s);
151
152	if (!memcg_nr_cache_ids)
153		return 0;
154
155	arr = kzalloc(sizeof(struct memcg_cache_array) +
156		      memcg_nr_cache_ids * sizeof(void *),
157		      GFP_KERNEL);
158	if (!arr)
159		return -ENOMEM;
160
161	RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
162	return 0;
163}
164
165static void destroy_memcg_params(struct kmem_cache *s)
166{
167	if (is_root_cache(s))
168		kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
169}
170
171static int update_memcg_params(struct kmem_cache *s, int new_array_size)
172{
173	struct memcg_cache_array *old, *new;
174
175	if (!is_root_cache(s))
176		return 0;
177
178	new = kzalloc(sizeof(struct memcg_cache_array) +
179		      new_array_size * sizeof(void *), GFP_KERNEL);
180	if (!new)
181		return -ENOMEM;
182
183	old = rcu_dereference_protected(s->memcg_params.memcg_caches,
184					lockdep_is_held(&slab_mutex));
185	if (old)
186		memcpy(new->entries, old->entries,
187		       memcg_nr_cache_ids * sizeof(void *));
188
189	rcu_assign_pointer(s->memcg_params.memcg_caches, new);
190	if (old)
191		kfree_rcu(old, rcu);
192	return 0;
193}
194
195int memcg_update_all_caches(int num_memcgs)
196{
197	struct kmem_cache *s;
198	int ret = 0;
199
200	mutex_lock(&slab_mutex);
201	list_for_each_entry(s, &slab_caches, list) {
202		ret = update_memcg_params(s, num_memcgs);
203		/*
204		 * Instead of freeing the memory, we'll just leave the caches
205		 * up to this point in an updated state.
206		 */
207		if (ret)
208			break;
209	}
210	mutex_unlock(&slab_mutex);
211	return ret;
212}
213#else
214static inline int init_memcg_params(struct kmem_cache *s,
215		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
216{
217	return 0;
218}
219
220static inline void destroy_memcg_params(struct kmem_cache *s)
221{
222}
223#endif /* CONFIG_MEMCG_KMEM */
224
225/*
226 * Find a mergeable slab cache
227 */
228int slab_unmergeable(struct kmem_cache *s)
229{
230	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
231		return 1;
232
233	if (!is_root_cache(s))
234		return 1;
235
236	if (s->ctor)
237		return 1;
238
239	/*
240	 * We may have set a slab to be unmergeable during bootstrap.
241	 */
242	if (s->refcount < 0)
243		return 1;
244
245	return 0;
246}
247
248struct kmem_cache *find_mergeable(size_t size, size_t align,
249		unsigned long flags, const char *name, void (*ctor)(void *))
250{
251	struct kmem_cache *s;
252
253	if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
254		return NULL;
255
256	if (ctor)
257		return NULL;
258
259	size = ALIGN(size, sizeof(void *));
260	align = calculate_alignment(flags, align, size);
261	size = ALIGN(size, align);
262	flags = kmem_cache_flags(size, flags, name, NULL);
263
264	list_for_each_entry_reverse(s, &slab_caches, list) {
265		if (slab_unmergeable(s))
266			continue;
267
268		if (size > s->size)
269			continue;
270
271		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
272			continue;
273		/*
274		 * Check if alignment is compatible.
275		 * Courtesy of Adrian Drzewiecki
276		 */
277		if ((s->size & ~(align - 1)) != s->size)
278			continue;
279
280		if (s->size - size >= sizeof(void *))
281			continue;
282
283		if (IS_ENABLED(CONFIG_SLAB) && align &&
284			(align > s->align || s->align % align))
285			continue;
286
287		return s;
288	}
289	return NULL;
290}
291
292/*
293 * Figure out what the alignment of the objects will be given a set of
294 * flags, a user specified alignment and the size of the objects.
295 */
296unsigned long calculate_alignment(unsigned long flags,
297		unsigned long align, unsigned long size)
298{
299	/*
300	 * If the user wants hardware cache aligned objects then follow that
301	 * suggestion if the object is sufficiently large.
302	 *
303	 * The hardware cache alignment cannot override the specified
304	 * alignment though. If that is greater then use it.
305	 */
306	if (flags & SLAB_HWCACHE_ALIGN) {
307		unsigned long ralign = cache_line_size();
308		while (size <= ralign / 2)
309			ralign /= 2;
310		align = max(align, ralign);
311	}
312
313	if (align < ARCH_SLAB_MINALIGN)
314		align = ARCH_SLAB_MINALIGN;
315
316	return ALIGN(align, sizeof(void *));
317}
318
319static struct kmem_cache *create_cache(const char *name,
320		size_t object_size, size_t size, size_t align,
321		unsigned long flags, void (*ctor)(void *),
322		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
323{
324	struct kmem_cache *s;
325	int err;
326
327	err = -ENOMEM;
328	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
329	if (!s)
330		goto out;
331
332	s->name = name;
333	s->object_size = object_size;
334	s->size = size;
335	s->align = align;
336	s->ctor = ctor;
337
338	err = init_memcg_params(s, memcg, root_cache);
339	if (err)
340		goto out_free_cache;
341
342	err = __kmem_cache_create(s, flags);
343	if (err)
344		goto out_free_cache;
345
346	s->refcount = 1;
347	list_add(&s->list, &slab_caches);
348out:
349	if (err)
350		return ERR_PTR(err);
351	return s;
352
353out_free_cache:
354	destroy_memcg_params(s);
355	kmem_cache_free(kmem_cache, s);
356	goto out;
357}
358
359/*
360 * kmem_cache_create - Create a cache.
361 * @name: A string which is used in /proc/slabinfo to identify this cache.
362 * @size: The size of objects to be created in this cache.
363 * @align: The required alignment for the objects.
364 * @flags: SLAB flags
365 * @ctor: A constructor for the objects.
366 *
367 * Returns a ptr to the cache on success, NULL on failure.
368 * Cannot be called within a interrupt, but can be interrupted.
369 * The @ctor is run when new pages are allocated by the cache.
370 *
371 * The flags are
372 *
373 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
374 * to catch references to uninitialised memory.
375 *
376 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
377 * for buffer overruns.
378 *
379 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
380 * cacheline.  This can be beneficial if you're counting cycles as closely
381 * as davem.
382 */
383struct kmem_cache *
384kmem_cache_create(const char *name, size_t size, size_t align,
385		  unsigned long flags, void (*ctor)(void *))
386{
387	struct kmem_cache *s = NULL;
388	const char *cache_name;
389	int err;
390
391	get_online_cpus();
392	get_online_mems();
393	memcg_get_cache_ids();
394
395	mutex_lock(&slab_mutex);
396
397	err = kmem_cache_sanity_check(name, size);
398	if (err) {
399		goto out_unlock;
400	}
401
402	/*
403	 * Some allocators will constraint the set of valid flags to a subset
404	 * of all flags. We expect them to define CACHE_CREATE_MASK in this
405	 * case, and we'll just provide them with a sanitized version of the
406	 * passed flags.
407	 */
408	flags &= CACHE_CREATE_MASK;
409
410	s = __kmem_cache_alias(name, size, align, flags, ctor);
411	if (s)
412		goto out_unlock;
413
414	cache_name = kstrdup_const(name, GFP_KERNEL);
415	if (!cache_name) {
416		err = -ENOMEM;
417		goto out_unlock;
418	}
419
420	s = create_cache(cache_name, size, size,
421			 calculate_alignment(flags, align, size),
422			 flags, ctor, NULL, NULL);
423	if (IS_ERR(s)) {
424		err = PTR_ERR(s);
425		kfree_const(cache_name);
426	}
427
428out_unlock:
429	mutex_unlock(&slab_mutex);
430
431	memcg_put_cache_ids();
432	put_online_mems();
433	put_online_cpus();
434
435	if (err) {
436		if (flags & SLAB_PANIC)
437			panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
438				name, err);
439		else {
440			printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
441				name, err);
442			dump_stack();
443		}
444		return NULL;
445	}
446	return s;
447}
448EXPORT_SYMBOL(kmem_cache_create);
449
450static int shutdown_cache(struct kmem_cache *s,
451		struct list_head *release, bool *need_rcu_barrier)
452{
453	if (__kmem_cache_shutdown(s) != 0)
454		return -EBUSY;
455
456	if (s->flags & SLAB_DESTROY_BY_RCU)
457		*need_rcu_barrier = true;
458
459	list_move(&s->list, release);
460	return 0;
461}
462
463static void release_caches(struct list_head *release, bool need_rcu_barrier)
464{
465	struct kmem_cache *s, *s2;
466
467	if (need_rcu_barrier)
468		rcu_barrier();
469
470	list_for_each_entry_safe(s, s2, release, list) {
471#ifdef SLAB_SUPPORTS_SYSFS
472		sysfs_slab_remove(s);
473#else
474		slab_kmem_cache_release(s);
475#endif
476	}
477}
478
479#ifdef CONFIG_MEMCG_KMEM
480/*
481 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
482 * @memcg: The memory cgroup the new cache is for.
483 * @root_cache: The parent of the new cache.
484 *
485 * This function attempts to create a kmem cache that will serve allocation
486 * requests going from @memcg to @root_cache. The new cache inherits properties
487 * from its parent.
488 */
489void memcg_create_kmem_cache(struct mem_cgroup *memcg,
490			     struct kmem_cache *root_cache)
491{
492	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
493	struct cgroup_subsys_state *css = &memcg->css;
494	struct memcg_cache_array *arr;
495	struct kmem_cache *s = NULL;
496	char *cache_name;
497	int idx;
498
499	get_online_cpus();
500	get_online_mems();
501
502	mutex_lock(&slab_mutex);
503
504	/*
505	 * The memory cgroup could have been deactivated while the cache
506	 * creation work was pending.
507	 */
508	if (!memcg_kmem_is_active(memcg))
509		goto out_unlock;
510
511	idx = memcg_cache_id(memcg);
512	arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
513					lockdep_is_held(&slab_mutex));
514
515	/*
516	 * Since per-memcg caches are created asynchronously on first
517	 * allocation (see memcg_kmem_get_cache()), several threads can try to
518	 * create the same cache, but only one of them may succeed.
519	 */
520	if (arr->entries[idx])
521		goto out_unlock;
522
523	cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
524	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
525			       css->id, memcg_name_buf);
526	if (!cache_name)
527		goto out_unlock;
528
529	s = create_cache(cache_name, root_cache->object_size,
530			 root_cache->size, root_cache->align,
531			 root_cache->flags, root_cache->ctor,
532			 memcg, root_cache);
533	/*
534	 * If we could not create a memcg cache, do not complain, because
535	 * that's not critical at all as we can always proceed with the root
536	 * cache.
537	 */
538	if (IS_ERR(s)) {
539		kfree(cache_name);
540		goto out_unlock;
541	}
542
543	list_add(&s->memcg_params.list, &root_cache->memcg_params.list);
544
545	/*
546	 * Since readers won't lock (see cache_from_memcg_idx()), we need a
547	 * barrier here to ensure nobody will see the kmem_cache partially
548	 * initialized.
549	 */
550	smp_wmb();
551	arr->entries[idx] = s;
552
553out_unlock:
554	mutex_unlock(&slab_mutex);
555
556	put_online_mems();
557	put_online_cpus();
558}
559
560void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
561{
562	int idx;
563	struct memcg_cache_array *arr;
564	struct kmem_cache *s, *c;
565
566	idx = memcg_cache_id(memcg);
567
568	get_online_cpus();
569	get_online_mems();
570
571	mutex_lock(&slab_mutex);
572	list_for_each_entry(s, &slab_caches, list) {
573		if (!is_root_cache(s))
574			continue;
575
576		arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
577						lockdep_is_held(&slab_mutex));
578		c = arr->entries[idx];
579		if (!c)
580			continue;
581
582		__kmem_cache_shrink(c, true);
583		arr->entries[idx] = NULL;
584	}
585	mutex_unlock(&slab_mutex);
586
587	put_online_mems();
588	put_online_cpus();
589}
590
591static int __shutdown_memcg_cache(struct kmem_cache *s,
592		struct list_head *release, bool *need_rcu_barrier)
593{
594	BUG_ON(is_root_cache(s));
595
596	if (shutdown_cache(s, release, need_rcu_barrier))
597		return -EBUSY;
598
599	list_del(&s->memcg_params.list);
600	return 0;
601}
602
603void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
604{
605	LIST_HEAD(release);
606	bool need_rcu_barrier = false;
607	struct kmem_cache *s, *s2;
608
609	get_online_cpus();
610	get_online_mems();
611
612	mutex_lock(&slab_mutex);
613	list_for_each_entry_safe(s, s2, &slab_caches, list) {
614		if (is_root_cache(s) || s->memcg_params.memcg != memcg)
615			continue;
616		/*
617		 * The cgroup is about to be freed and therefore has no charges
618		 * left. Hence, all its caches must be empty by now.
619		 */
620		BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier));
621	}
622	mutex_unlock(&slab_mutex);
623
624	put_online_mems();
625	put_online_cpus();
626
627	release_caches(&release, need_rcu_barrier);
628}
629
630static int shutdown_memcg_caches(struct kmem_cache *s,
631		struct list_head *release, bool *need_rcu_barrier)
632{
633	struct memcg_cache_array *arr;
634	struct kmem_cache *c, *c2;
635	LIST_HEAD(busy);
636	int i;
637
638	BUG_ON(!is_root_cache(s));
639
640	/*
641	 * First, shutdown active caches, i.e. caches that belong to online
642	 * memory cgroups.
643	 */
644	arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
645					lockdep_is_held(&slab_mutex));
646	for_each_memcg_cache_index(i) {
647		c = arr->entries[i];
648		if (!c)
649			continue;
650		if (__shutdown_memcg_cache(c, release, need_rcu_barrier))
651			/*
652			 * The cache still has objects. Move it to a temporary
653			 * list so as not to try to destroy it for a second
654			 * time while iterating over inactive caches below.
655			 */
656			list_move(&c->memcg_params.list, &busy);
657		else
658			/*
659			 * The cache is empty and will be destroyed soon. Clear
660			 * the pointer to it in the memcg_caches array so that
661			 * it will never be accessed even if the root cache
662			 * stays alive.
663			 */
664			arr->entries[i] = NULL;
665	}
666
667	/*
668	 * Second, shutdown all caches left from memory cgroups that are now
669	 * offline.
670	 */
671	list_for_each_entry_safe(c, c2, &s->memcg_params.list,
672				 memcg_params.list)
673		__shutdown_memcg_cache(c, release, need_rcu_barrier);
674
675	list_splice(&busy, &s->memcg_params.list);
676
677	/*
678	 * A cache being destroyed must be empty. In particular, this means
679	 * that all per memcg caches attached to it must be empty too.
680	 */
681	if (!list_empty(&s->memcg_params.list))
682		return -EBUSY;
683	return 0;
684}
685#else
686static inline int shutdown_memcg_caches(struct kmem_cache *s,
687		struct list_head *release, bool *need_rcu_barrier)
688{
689	return 0;
690}
691#endif /* CONFIG_MEMCG_KMEM */
692
693void slab_kmem_cache_release(struct kmem_cache *s)
694{
695	destroy_memcg_params(s);
696	kfree_const(s->name);
697	kmem_cache_free(kmem_cache, s);
698}
699
700void kmem_cache_destroy(struct kmem_cache *s)
701{
702	LIST_HEAD(release);
703	bool need_rcu_barrier = false;
704	int err;
705
706	if (unlikely(!s))
707		return;
708
709	get_online_cpus();
710	get_online_mems();
711
712	mutex_lock(&slab_mutex);
713
714	s->refcount--;
715	if (s->refcount)
716		goto out_unlock;
717
718	err = shutdown_memcg_caches(s, &release, &need_rcu_barrier);
719	if (!err)
720		err = shutdown_cache(s, &release, &need_rcu_barrier);
721
722	if (err) {
723		pr_err("kmem_cache_destroy %s: "
724		       "Slab cache still has objects\n", s->name);
725		dump_stack();
726	}
727out_unlock:
728	mutex_unlock(&slab_mutex);
729
730	put_online_mems();
731	put_online_cpus();
732
733	release_caches(&release, need_rcu_barrier);
734}
735EXPORT_SYMBOL(kmem_cache_destroy);
736
737/**
738 * kmem_cache_shrink - Shrink a cache.
739 * @cachep: The cache to shrink.
740 *
741 * Releases as many slabs as possible for a cache.
742 * To help debugging, a zero exit status indicates all slabs were released.
743 */
744int kmem_cache_shrink(struct kmem_cache *cachep)
745{
746	int ret;
747
748	get_online_cpus();
749	get_online_mems();
750	ret = __kmem_cache_shrink(cachep, false);
751	put_online_mems();
752	put_online_cpus();
753	return ret;
754}
755EXPORT_SYMBOL(kmem_cache_shrink);
756
757bool slab_is_available(void)
758{
759	return slab_state >= UP;
760}
761
762#ifndef CONFIG_SLOB
763/* Create a cache during boot when no slab services are available yet */
764void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
765		unsigned long flags)
766{
767	int err;
768
769	s->name = name;
770	s->size = s->object_size = size;
771	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
772
773	slab_init_memcg_params(s);
774
775	err = __kmem_cache_create(s, flags);
776
777	if (err)
778		panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
779					name, size, err);
780
781	s->refcount = -1;	/* Exempt from merging for now */
782}
783
784struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
785				unsigned long flags)
786{
787	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
788
789	if (!s)
790		panic("Out of memory when creating slab %s\n", name);
791
792	create_boot_cache(s, name, size, flags);
793	list_add(&s->list, &slab_caches);
794	s->refcount = 1;
795	return s;
796}
797
798struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
799EXPORT_SYMBOL(kmalloc_caches);
800
801#ifdef CONFIG_ZONE_DMA
802struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
803EXPORT_SYMBOL(kmalloc_dma_caches);
804#endif
805
806/*
807 * Conversion table for small slabs sizes / 8 to the index in the
808 * kmalloc array. This is necessary for slabs < 192 since we have non power
809 * of two cache sizes there. The size of larger slabs can be determined using
810 * fls.
811 */
812static s8 size_index[24] = {
813	3,	/* 8 */
814	4,	/* 16 */
815	5,	/* 24 */
816	5,	/* 32 */
817	6,	/* 40 */
818	6,	/* 48 */
819	6,	/* 56 */
820	6,	/* 64 */
821	1,	/* 72 */
822	1,	/* 80 */
823	1,	/* 88 */
824	1,	/* 96 */
825	7,	/* 104 */
826	7,	/* 112 */
827	7,	/* 120 */
828	7,	/* 128 */
829	2,	/* 136 */
830	2,	/* 144 */
831	2,	/* 152 */
832	2,	/* 160 */
833	2,	/* 168 */
834	2,	/* 176 */
835	2,	/* 184 */
836	2	/* 192 */
837};
838
839static inline int size_index_elem(size_t bytes)
840{
841	return (bytes - 1) / 8;
842}
843
844/*
845 * Find the kmem_cache structure that serves a given size of
846 * allocation
847 */
848struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
849{
850	int index;
851
852	if (unlikely(size > KMALLOC_MAX_SIZE)) {
853		WARN_ON_ONCE(!(flags & __GFP_NOWARN));
854		return NULL;
855	}
856
857	if (size <= 192) {
858		if (!size)
859			return ZERO_SIZE_PTR;
860
861		index = size_index[size_index_elem(size)];
862	} else
863		index = fls(size - 1);
864
865#ifdef CONFIG_ZONE_DMA
866	if (unlikely((flags & GFP_DMA)))
867		return kmalloc_dma_caches[index];
868
869#endif
870	return kmalloc_caches[index];
871}
872
873/*
874 * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
875 * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
876 * kmalloc-67108864.
877 */
878static struct {
879	const char *name;
880	unsigned long size;
881} const kmalloc_info[] __initconst = {
882	{NULL,                      0},		{"kmalloc-96",             96},
883	{"kmalloc-192",           192},		{"kmalloc-8",               8},
884	{"kmalloc-16",             16},		{"kmalloc-32",             32},
885	{"kmalloc-64",             64},		{"kmalloc-128",           128},
886	{"kmalloc-256",           256},		{"kmalloc-512",           512},
887	{"kmalloc-1024",         1024},		{"kmalloc-2048",         2048},
888	{"kmalloc-4096",         4096},		{"kmalloc-8192",         8192},
889	{"kmalloc-16384",       16384},		{"kmalloc-32768",       32768},
890	{"kmalloc-65536",       65536},		{"kmalloc-131072",     131072},
891	{"kmalloc-262144",     262144},		{"kmalloc-524288",     524288},
892	{"kmalloc-1048576",   1048576},		{"kmalloc-2097152",   2097152},
893	{"kmalloc-4194304",   4194304},		{"kmalloc-8388608",   8388608},
894	{"kmalloc-16777216", 16777216},		{"kmalloc-33554432", 33554432},
895	{"kmalloc-67108864", 67108864}
896};
897
898/*
899 * Patch up the size_index table if we have strange large alignment
900 * requirements for the kmalloc array. This is only the case for
901 * MIPS it seems. The standard arches will not generate any code here.
902 *
903 * Largest permitted alignment is 256 bytes due to the way we
904 * handle the index determination for the smaller caches.
905 *
906 * Make sure that nothing crazy happens if someone starts tinkering
907 * around with ARCH_KMALLOC_MINALIGN
908 */
909void __init setup_kmalloc_cache_index_table(void)
910{
911	int i;
912
913	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
914		(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
915
916	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
917		int elem = size_index_elem(i);
918
919		if (elem >= ARRAY_SIZE(size_index))
920			break;
921		size_index[elem] = KMALLOC_SHIFT_LOW;
922	}
923
924	if (KMALLOC_MIN_SIZE >= 64) {
925		/*
926		 * The 96 byte size cache is not used if the alignment
927		 * is 64 byte.
928		 */
929		for (i = 64 + 8; i <= 96; i += 8)
930			size_index[size_index_elem(i)] = 7;
931
932	}
933
934	if (KMALLOC_MIN_SIZE >= 128) {
935		/*
936		 * The 192 byte sized cache is not used if the alignment
937		 * is 128 byte. Redirect kmalloc to use the 256 byte cache
938		 * instead.
939		 */
940		for (i = 128 + 8; i <= 192; i += 8)
941			size_index[size_index_elem(i)] = 8;
942	}
943}
944
945static void __init new_kmalloc_cache(int idx, unsigned long flags)
946{
947	kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
948					kmalloc_info[idx].size, flags);
949}
950
951/*
952 * Create the kmalloc array. Some of the regular kmalloc arrays
953 * may already have been created because they were needed to
954 * enable allocations for slab creation.
955 */
956void __init create_kmalloc_caches(unsigned long flags)
957{
958	int i;
959
960	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
961		if (!kmalloc_caches[i])
962			new_kmalloc_cache(i, flags);
963
964		/*
965		 * Caches that are not of the two-to-the-power-of size.
966		 * These have to be created immediately after the
967		 * earlier power of two caches
968		 */
969		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
970			new_kmalloc_cache(1, flags);
971		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
972			new_kmalloc_cache(2, flags);
973	}
974
975	/* Kmalloc array is now usable */
976	slab_state = UP;
977
978#ifdef CONFIG_ZONE_DMA
979	for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
980		struct kmem_cache *s = kmalloc_caches[i];
981
982		if (s) {
983			int size = kmalloc_size(i);
984			char *n = kasprintf(GFP_NOWAIT,
985				 "dma-kmalloc-%d", size);
986
987			BUG_ON(!n);
988			kmalloc_dma_caches[i] = create_kmalloc_cache(n,
989				size, SLAB_CACHE_DMA | flags);
990		}
991	}
992#endif
993}
994#endif /* !CONFIG_SLOB */
995
996/*
997 * To avoid unnecessary overhead, we pass through large allocation requests
998 * directly to the page allocator. We use __GFP_COMP, because we will need to
999 * know the allocation order to free the pages properly in kfree.
1000 */
1001void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1002{
1003	void *ret;
1004	struct page *page;
1005
1006	flags |= __GFP_COMP;
1007	page = alloc_kmem_pages(flags, order);
1008	ret = page ? page_address(page) : NULL;
1009	kmemleak_alloc(ret, size, 1, flags);
1010	kasan_kmalloc_large(ret, size);
1011	return ret;
1012}
1013EXPORT_SYMBOL(kmalloc_order);
1014
1015#ifdef CONFIG_TRACING
1016void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1017{
1018	void *ret = kmalloc_order(size, flags, order);
1019	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1020	return ret;
1021}
1022EXPORT_SYMBOL(kmalloc_order_trace);
1023#endif
1024
1025#ifdef CONFIG_SLABINFO
1026
1027#ifdef CONFIG_SLAB
1028#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
1029#else
1030#define SLABINFO_RIGHTS S_IRUSR
1031#endif
1032
1033static void print_slabinfo_header(struct seq_file *m)
1034{
1035	/*
1036	 * Output format version, so at least we can change it
1037	 * without _too_ many complaints.
1038	 */
1039#ifdef CONFIG_DEBUG_SLAB
1040	seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
1041#else
1042	seq_puts(m, "slabinfo - version: 2.1\n");
1043#endif
1044	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
1045		 "<objperslab> <pagesperslab>");
1046	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
1047	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
1048#ifdef CONFIG_DEBUG_SLAB
1049	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
1050		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
1051	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
1052#endif
1053	seq_putc(m, '\n');
1054}
1055
1056void *slab_start(struct seq_file *m, loff_t *pos)
1057{
1058	mutex_lock(&slab_mutex);
1059	return seq_list_start(&slab_caches, *pos);
1060}
1061
1062void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1063{
1064	return seq_list_next(p, &slab_caches, pos);
1065}
1066
1067void slab_stop(struct seq_file *m, void *p)
1068{
1069	mutex_unlock(&slab_mutex);
1070}
1071
1072static void
1073memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1074{
1075	struct kmem_cache *c;
1076	struct slabinfo sinfo;
1077
1078	if (!is_root_cache(s))
1079		return;
1080
1081	for_each_memcg_cache(c, s) {
1082		memset(&sinfo, 0, sizeof(sinfo));
1083		get_slabinfo(c, &sinfo);
1084
1085		info->active_slabs += sinfo.active_slabs;
1086		info->num_slabs += sinfo.num_slabs;
1087		info->shared_avail += sinfo.shared_avail;
1088		info->active_objs += sinfo.active_objs;
1089		info->num_objs += sinfo.num_objs;
1090	}
1091}
1092
1093static void cache_show(struct kmem_cache *s, struct seq_file *m)
1094{
1095	struct slabinfo sinfo;
1096
1097	memset(&sinfo, 0, sizeof(sinfo));
1098	get_slabinfo(s, &sinfo);
1099
1100	memcg_accumulate_slabinfo(s, &sinfo);
1101
1102	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1103		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
1104		   sinfo.objects_per_slab, (1 << sinfo.cache_order));
1105
1106	seq_printf(m, " : tunables %4u %4u %4u",
1107		   sinfo.limit, sinfo.batchcount, sinfo.shared);
1108	seq_printf(m, " : slabdata %6lu %6lu %6lu",
1109		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
1110	slabinfo_show_stats(m, s);
1111	seq_putc(m, '\n');
1112}
1113
1114static int slab_show(struct seq_file *m, void *p)
1115{
1116	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1117
1118	if (p == slab_caches.next)
1119		print_slabinfo_header(m);
1120	if (is_root_cache(s))
1121		cache_show(s, m);
1122	return 0;
1123}
1124
1125#ifdef CONFIG_MEMCG_KMEM
1126int memcg_slab_show(struct seq_file *m, void *p)
1127{
1128	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1129	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1130
1131	if (p == slab_caches.next)
1132		print_slabinfo_header(m);
1133	if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
1134		cache_show(s, m);
1135	return 0;
1136}
1137#endif
1138
1139/*
1140 * slabinfo_op - iterator that generates /proc/slabinfo
1141 *
1142 * Output layout:
1143 * cache-name
1144 * num-active-objs
1145 * total-objs
1146 * object size
1147 * num-active-slabs
1148 * total-slabs
1149 * num-pages-per-slab
1150 * + further values on SMP and with statistics enabled
1151 */
1152static const struct seq_operations slabinfo_op = {
1153	.start = slab_start,
1154	.next = slab_next,
1155	.stop = slab_stop,
1156	.show = slab_show,
1157};
1158
1159static int slabinfo_open(struct inode *inode, struct file *file)
1160{
1161	return seq_open(file, &slabinfo_op);
1162}
1163
1164static const struct file_operations proc_slabinfo_operations = {
1165	.open		= slabinfo_open,
1166	.read		= seq_read,
1167	.write          = slabinfo_write,
1168	.llseek		= seq_lseek,
1169	.release	= seq_release,
1170};
1171
1172static int __init slab_proc_init(void)
1173{
1174	proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1175						&proc_slabinfo_operations);
1176	return 0;
1177}
1178module_init(slab_proc_init);
1179#endif /* CONFIG_SLABINFO */
1180
1181static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1182					   gfp_t flags)
1183{
1184	void *ret;
1185	size_t ks = 0;
1186
1187	if (p)
1188		ks = ksize(p);
1189
1190	if (ks >= new_size) {
1191		kasan_krealloc((void *)p, new_size);
1192		return (void *)p;
1193	}
1194
1195	ret = kmalloc_track_caller(new_size, flags);
1196	if (ret && p)
1197		memcpy(ret, p, ks);
1198
1199	return ret;
1200}
1201
1202/**
1203 * __krealloc - like krealloc() but don't free @p.
1204 * @p: object to reallocate memory for.
1205 * @new_size: how many bytes of memory are required.
1206 * @flags: the type of memory to allocate.
1207 *
1208 * This function is like krealloc() except it never frees the originally
1209 * allocated buffer. Use this if you don't want to free the buffer immediately
1210 * like, for example, with RCU.
1211 */
1212void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1213{
1214	if (unlikely(!new_size))
1215		return ZERO_SIZE_PTR;
1216
1217	return __do_krealloc(p, new_size, flags);
1218
1219}
1220EXPORT_SYMBOL(__krealloc);
1221
1222/**
1223 * krealloc - reallocate memory. The contents will remain unchanged.
1224 * @p: object to reallocate memory for.
1225 * @new_size: how many bytes of memory are required.
1226 * @flags: the type of memory to allocate.
1227 *
1228 * The contents of the object pointed to are preserved up to the
1229 * lesser of the new and old sizes.  If @p is %NULL, krealloc()
1230 * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
1231 * %NULL pointer, the object pointed to is freed.
1232 */
1233void *krealloc(const void *p, size_t new_size, gfp_t flags)
1234{
1235	void *ret;
1236
1237	if (unlikely(!new_size)) {
1238		kfree(p);
1239		return ZERO_SIZE_PTR;
1240	}
1241
1242	ret = __do_krealloc(p, new_size, flags);
1243	if (ret && p != ret)
1244		kfree(p);
1245
1246	return ret;
1247}
1248EXPORT_SYMBOL(krealloc);
1249
1250/**
1251 * kzfree - like kfree but zero memory
1252 * @p: object to free memory of
1253 *
1254 * The memory of the object @p points to is zeroed before freed.
1255 * If @p is %NULL, kzfree() does nothing.
1256 *
1257 * Note: this function zeroes the whole allocated buffer which can be a good
1258 * deal bigger than the requested buffer size passed to kmalloc(). So be
1259 * careful when using this function in performance sensitive code.
1260 */
1261void kzfree(const void *p)
1262{
1263	size_t ks;
1264	void *mem = (void *)p;
1265
1266	if (unlikely(ZERO_OR_NULL_PTR(mem)))
1267		return;
1268	ks = ksize(mem);
1269	memset(mem, 0, ks);
1270	kfree(mem);
1271}
1272EXPORT_SYMBOL(kzfree);
1273
1274/* Tracepoints definitions. */
1275EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1276EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1277EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1278EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1279EXPORT_TRACEPOINT_SYMBOL(kfree);
1280EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
1281