1/*
2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3 * Authors: David Chinner and Glauber Costa
4 *
5 * Generic LRU infrastructure
6 */
7#include <linux/kernel.h>
8#include <linux/module.h>
9#include <linux/mm.h>
10#include <linux/list_lru.h>
11#include <linux/slab.h>
12#include <linux/mutex.h>
13#include <linux/memcontrol.h>
14
15#ifdef CONFIG_MEMCG_KMEM
16static LIST_HEAD(list_lrus);
17static DEFINE_MUTEX(list_lrus_mutex);
18
19static void list_lru_register(struct list_lru *lru)
20{
21	mutex_lock(&list_lrus_mutex);
22	list_add(&lru->list, &list_lrus);
23	mutex_unlock(&list_lrus_mutex);
24}
25
26static void list_lru_unregister(struct list_lru *lru)
27{
28	mutex_lock(&list_lrus_mutex);
29	list_del(&lru->list);
30	mutex_unlock(&list_lrus_mutex);
31}
32#else
33static void list_lru_register(struct list_lru *lru)
34{
35}
36
37static void list_lru_unregister(struct list_lru *lru)
38{
39}
40#endif /* CONFIG_MEMCG_KMEM */
41
42#ifdef CONFIG_MEMCG_KMEM
43static inline bool list_lru_memcg_aware(struct list_lru *lru)
44{
45	return !!lru->node[0].memcg_lrus;
46}
47
48static inline struct list_lru_one *
49list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
50{
51	/*
52	 * The lock protects the array of per cgroup lists from relocation
53	 * (see memcg_update_list_lru_node).
54	 */
55	lockdep_assert_held(&nlru->lock);
56	if (nlru->memcg_lrus && idx >= 0)
57		return nlru->memcg_lrus->lru[idx];
58
59	return &nlru->lru;
60}
61
62static inline struct list_lru_one *
63list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
64{
65	struct mem_cgroup *memcg;
66
67	if (!nlru->memcg_lrus)
68		return &nlru->lru;
69
70	memcg = mem_cgroup_from_kmem(ptr);
71	if (!memcg)
72		return &nlru->lru;
73
74	return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
75}
76#else
77static inline bool list_lru_memcg_aware(struct list_lru *lru)
78{
79	return false;
80}
81
82static inline struct list_lru_one *
83list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
84{
85	return &nlru->lru;
86}
87
88static inline struct list_lru_one *
89list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
90{
91	return &nlru->lru;
92}
93#endif /* CONFIG_MEMCG_KMEM */
94
95bool list_lru_add(struct list_lru *lru, struct list_head *item)
96{
97	int nid = page_to_nid(virt_to_page(item));
98	struct list_lru_node *nlru = &lru->node[nid];
99	struct list_lru_one *l;
100
101	spin_lock(&nlru->lock);
102	l = list_lru_from_kmem(nlru, item);
103	if (list_empty(item)) {
104		list_add_tail(item, &l->list);
105		l->nr_items++;
106		spin_unlock(&nlru->lock);
107		return true;
108	}
109	spin_unlock(&nlru->lock);
110	return false;
111}
112EXPORT_SYMBOL_GPL(list_lru_add);
113
114bool list_lru_del(struct list_lru *lru, struct list_head *item)
115{
116	int nid = page_to_nid(virt_to_page(item));
117	struct list_lru_node *nlru = &lru->node[nid];
118	struct list_lru_one *l;
119
120	spin_lock(&nlru->lock);
121	l = list_lru_from_kmem(nlru, item);
122	if (!list_empty(item)) {
123		list_del_init(item);
124		l->nr_items--;
125		spin_unlock(&nlru->lock);
126		return true;
127	}
128	spin_unlock(&nlru->lock);
129	return false;
130}
131EXPORT_SYMBOL_GPL(list_lru_del);
132
133void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
134{
135	list_del_init(item);
136	list->nr_items--;
137}
138EXPORT_SYMBOL_GPL(list_lru_isolate);
139
140void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
141			   struct list_head *head)
142{
143	list_move(item, head);
144	list->nr_items--;
145}
146EXPORT_SYMBOL_GPL(list_lru_isolate_move);
147
148static unsigned long __list_lru_count_one(struct list_lru *lru,
149					  int nid, int memcg_idx)
150{
151	struct list_lru_node *nlru = &lru->node[nid];
152	struct list_lru_one *l;
153	unsigned long count;
154
155	spin_lock(&nlru->lock);
156	l = list_lru_from_memcg_idx(nlru, memcg_idx);
157	count = l->nr_items;
158	spin_unlock(&nlru->lock);
159
160	return count;
161}
162
163unsigned long list_lru_count_one(struct list_lru *lru,
164				 int nid, struct mem_cgroup *memcg)
165{
166	return __list_lru_count_one(lru, nid, memcg_cache_id(memcg));
167}
168EXPORT_SYMBOL_GPL(list_lru_count_one);
169
170unsigned long list_lru_count_node(struct list_lru *lru, int nid)
171{
172	long count = 0;
173	int memcg_idx;
174
175	count += __list_lru_count_one(lru, nid, -1);
176	if (list_lru_memcg_aware(lru)) {
177		for_each_memcg_cache_index(memcg_idx)
178			count += __list_lru_count_one(lru, nid, memcg_idx);
179	}
180	return count;
181}
182EXPORT_SYMBOL_GPL(list_lru_count_node);
183
184static unsigned long
185__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
186		    list_lru_walk_cb isolate, void *cb_arg,
187		    unsigned long *nr_to_walk)
188{
189
190	struct list_lru_node *nlru = &lru->node[nid];
191	struct list_lru_one *l;
192	struct list_head *item, *n;
193	unsigned long isolated = 0;
194
195	spin_lock(&nlru->lock);
196	l = list_lru_from_memcg_idx(nlru, memcg_idx);
197restart:
198	list_for_each_safe(item, n, &l->list) {
199		enum lru_status ret;
200
201		/*
202		 * decrement nr_to_walk first so that we don't livelock if we
203		 * get stuck on large numbesr of LRU_RETRY items
204		 */
205		if (!*nr_to_walk)
206			break;
207		--*nr_to_walk;
208
209		ret = isolate(item, l, &nlru->lock, cb_arg);
210		switch (ret) {
211		case LRU_REMOVED_RETRY:
212			assert_spin_locked(&nlru->lock);
213		case LRU_REMOVED:
214			isolated++;
215			/*
216			 * If the lru lock has been dropped, our list
217			 * traversal is now invalid and so we have to
218			 * restart from scratch.
219			 */
220			if (ret == LRU_REMOVED_RETRY)
221				goto restart;
222			break;
223		case LRU_ROTATE:
224			list_move_tail(item, &l->list);
225			break;
226		case LRU_SKIP:
227			break;
228		case LRU_RETRY:
229			/*
230			 * The lru lock has been dropped, our list traversal is
231			 * now invalid and so we have to restart from scratch.
232			 */
233			assert_spin_locked(&nlru->lock);
234			goto restart;
235		default:
236			BUG();
237		}
238	}
239
240	spin_unlock(&nlru->lock);
241	return isolated;
242}
243
244unsigned long
245list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
246		  list_lru_walk_cb isolate, void *cb_arg,
247		  unsigned long *nr_to_walk)
248{
249	return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
250				   isolate, cb_arg, nr_to_walk);
251}
252EXPORT_SYMBOL_GPL(list_lru_walk_one);
253
254unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
255				 list_lru_walk_cb isolate, void *cb_arg,
256				 unsigned long *nr_to_walk)
257{
258	long isolated = 0;
259	int memcg_idx;
260
261	isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
262					nr_to_walk);
263	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
264		for_each_memcg_cache_index(memcg_idx) {
265			isolated += __list_lru_walk_one(lru, nid, memcg_idx,
266						isolate, cb_arg, nr_to_walk);
267			if (*nr_to_walk <= 0)
268				break;
269		}
270	}
271	return isolated;
272}
273EXPORT_SYMBOL_GPL(list_lru_walk_node);
274
275static void init_one_lru(struct list_lru_one *l)
276{
277	INIT_LIST_HEAD(&l->list);
278	l->nr_items = 0;
279}
280
281#ifdef CONFIG_MEMCG_KMEM
282static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
283					  int begin, int end)
284{
285	int i;
286
287	for (i = begin; i < end; i++)
288		kfree(memcg_lrus->lru[i]);
289}
290
291static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
292				      int begin, int end)
293{
294	int i;
295
296	for (i = begin; i < end; i++) {
297		struct list_lru_one *l;
298
299		l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
300		if (!l)
301			goto fail;
302
303		init_one_lru(l);
304		memcg_lrus->lru[i] = l;
305	}
306	return 0;
307fail:
308	__memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
309	return -ENOMEM;
310}
311
312static int memcg_init_list_lru_node(struct list_lru_node *nlru)
313{
314	int size = memcg_nr_cache_ids;
315
316	nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL);
317	if (!nlru->memcg_lrus)
318		return -ENOMEM;
319
320	if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
321		kfree(nlru->memcg_lrus);
322		return -ENOMEM;
323	}
324
325	return 0;
326}
327
328static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
329{
330	__memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
331	kfree(nlru->memcg_lrus);
332}
333
334static int memcg_update_list_lru_node(struct list_lru_node *nlru,
335				      int old_size, int new_size)
336{
337	struct list_lru_memcg *old, *new;
338
339	BUG_ON(old_size > new_size);
340
341	old = nlru->memcg_lrus;
342	new = kmalloc(new_size * sizeof(void *), GFP_KERNEL);
343	if (!new)
344		return -ENOMEM;
345
346	if (__memcg_init_list_lru_node(new, old_size, new_size)) {
347		kfree(new);
348		return -ENOMEM;
349	}
350
351	memcpy(new, old, old_size * sizeof(void *));
352
353	/*
354	 * The lock guarantees that we won't race with a reader
355	 * (see list_lru_from_memcg_idx).
356	 *
357	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
358	 * we have to use IRQ-safe primitives here to avoid deadlock.
359	 */
360	spin_lock_irq(&nlru->lock);
361	nlru->memcg_lrus = new;
362	spin_unlock_irq(&nlru->lock);
363
364	kfree(old);
365	return 0;
366}
367
368static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
369					      int old_size, int new_size)
370{
371	/* do not bother shrinking the array back to the old size, because we
372	 * cannot handle allocation failures here */
373	__memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size);
374}
375
376static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
377{
378	int i;
379
380	for (i = 0; i < nr_node_ids; i++) {
381		if (!memcg_aware)
382			lru->node[i].memcg_lrus = NULL;
383		else if (memcg_init_list_lru_node(&lru->node[i]))
384			goto fail;
385	}
386	return 0;
387fail:
388	for (i = i - 1; i >= 0; i--)
389		memcg_destroy_list_lru_node(&lru->node[i]);
390	return -ENOMEM;
391}
392
393static void memcg_destroy_list_lru(struct list_lru *lru)
394{
395	int i;
396
397	if (!list_lru_memcg_aware(lru))
398		return;
399
400	for (i = 0; i < nr_node_ids; i++)
401		memcg_destroy_list_lru_node(&lru->node[i]);
402}
403
404static int memcg_update_list_lru(struct list_lru *lru,
405				 int old_size, int new_size)
406{
407	int i;
408
409	if (!list_lru_memcg_aware(lru))
410		return 0;
411
412	for (i = 0; i < nr_node_ids; i++) {
413		if (memcg_update_list_lru_node(&lru->node[i],
414					       old_size, new_size))
415			goto fail;
416	}
417	return 0;
418fail:
419	for (i = i - 1; i >= 0; i--)
420		memcg_cancel_update_list_lru_node(&lru->node[i],
421						  old_size, new_size);
422	return -ENOMEM;
423}
424
425static void memcg_cancel_update_list_lru(struct list_lru *lru,
426					 int old_size, int new_size)
427{
428	int i;
429
430	if (!list_lru_memcg_aware(lru))
431		return;
432
433	for (i = 0; i < nr_node_ids; i++)
434		memcg_cancel_update_list_lru_node(&lru->node[i],
435						  old_size, new_size);
436}
437
438int memcg_update_all_list_lrus(int new_size)
439{
440	int ret = 0;
441	struct list_lru *lru;
442	int old_size = memcg_nr_cache_ids;
443
444	mutex_lock(&list_lrus_mutex);
445	list_for_each_entry(lru, &list_lrus, list) {
446		ret = memcg_update_list_lru(lru, old_size, new_size);
447		if (ret)
448			goto fail;
449	}
450out:
451	mutex_unlock(&list_lrus_mutex);
452	return ret;
453fail:
454	list_for_each_entry_continue_reverse(lru, &list_lrus, list)
455		memcg_cancel_update_list_lru(lru, old_size, new_size);
456	goto out;
457}
458
459static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
460				      int src_idx, int dst_idx)
461{
462	struct list_lru_one *src, *dst;
463
464	/*
465	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
466	 * we have to use IRQ-safe primitives here to avoid deadlock.
467	 */
468	spin_lock_irq(&nlru->lock);
469
470	src = list_lru_from_memcg_idx(nlru, src_idx);
471	dst = list_lru_from_memcg_idx(nlru, dst_idx);
472
473	list_splice_init(&src->list, &dst->list);
474	dst->nr_items += src->nr_items;
475	src->nr_items = 0;
476
477	spin_unlock_irq(&nlru->lock);
478}
479
480static void memcg_drain_list_lru(struct list_lru *lru,
481				 int src_idx, int dst_idx)
482{
483	int i;
484
485	if (!list_lru_memcg_aware(lru))
486		return;
487
488	for (i = 0; i < nr_node_ids; i++)
489		memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx);
490}
491
492void memcg_drain_all_list_lrus(int src_idx, int dst_idx)
493{
494	struct list_lru *lru;
495
496	mutex_lock(&list_lrus_mutex);
497	list_for_each_entry(lru, &list_lrus, list)
498		memcg_drain_list_lru(lru, src_idx, dst_idx);
499	mutex_unlock(&list_lrus_mutex);
500}
501#else
502static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
503{
504	return 0;
505}
506
507static void memcg_destroy_list_lru(struct list_lru *lru)
508{
509}
510#endif /* CONFIG_MEMCG_KMEM */
511
512int __list_lru_init(struct list_lru *lru, bool memcg_aware,
513		    struct lock_class_key *key)
514{
515	int i;
516	size_t size = sizeof(*lru->node) * nr_node_ids;
517	int err = -ENOMEM;
518
519	memcg_get_cache_ids();
520
521	lru->node = kzalloc(size, GFP_KERNEL);
522	if (!lru->node)
523		goto out;
524
525	for (i = 0; i < nr_node_ids; i++) {
526		spin_lock_init(&lru->node[i].lock);
527		if (key)
528			lockdep_set_class(&lru->node[i].lock, key);
529		init_one_lru(&lru->node[i].lru);
530	}
531
532	err = memcg_init_list_lru(lru, memcg_aware);
533	if (err) {
534		kfree(lru->node);
535		goto out;
536	}
537
538	list_lru_register(lru);
539out:
540	memcg_put_cache_ids();
541	return err;
542}
543EXPORT_SYMBOL_GPL(__list_lru_init);
544
545void list_lru_destroy(struct list_lru *lru)
546{
547	/* Already destroyed or not yet initialized? */
548	if (!lru->node)
549		return;
550
551	memcg_get_cache_ids();
552
553	list_lru_unregister(lru);
554
555	memcg_destroy_list_lru(lru);
556	kfree(lru->node);
557	lru->node = NULL;
558
559	memcg_put_cache_ids();
560}
561EXPORT_SYMBOL_GPL(list_lru_destroy);
562