1/* 2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 3 * Authors: David Chinner and Glauber Costa 4 * 5 * Generic LRU infrastructure 6 */ 7#include <linux/kernel.h> 8#include <linux/module.h> 9#include <linux/mm.h> 10#include <linux/list_lru.h> 11#include <linux/slab.h> 12#include <linux/mutex.h> 13#include <linux/memcontrol.h> 14 15#ifdef CONFIG_MEMCG_KMEM 16static LIST_HEAD(list_lrus); 17static DEFINE_MUTEX(list_lrus_mutex); 18 19static void list_lru_register(struct list_lru *lru) 20{ 21 mutex_lock(&list_lrus_mutex); 22 list_add(&lru->list, &list_lrus); 23 mutex_unlock(&list_lrus_mutex); 24} 25 26static void list_lru_unregister(struct list_lru *lru) 27{ 28 mutex_lock(&list_lrus_mutex); 29 list_del(&lru->list); 30 mutex_unlock(&list_lrus_mutex); 31} 32#else 33static void list_lru_register(struct list_lru *lru) 34{ 35} 36 37static void list_lru_unregister(struct list_lru *lru) 38{ 39} 40#endif /* CONFIG_MEMCG_KMEM */ 41 42#ifdef CONFIG_MEMCG_KMEM 43static inline bool list_lru_memcg_aware(struct list_lru *lru) 44{ 45 return !!lru->node[0].memcg_lrus; 46} 47 48static inline struct list_lru_one * 49list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 50{ 51 /* 52 * The lock protects the array of per cgroup lists from relocation 53 * (see memcg_update_list_lru_node). 54 */ 55 lockdep_assert_held(&nlru->lock); 56 if (nlru->memcg_lrus && idx >= 0) 57 return nlru->memcg_lrus->lru[idx]; 58 59 return &nlru->lru; 60} 61 62static inline struct list_lru_one * 63list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 64{ 65 struct mem_cgroup *memcg; 66 67 if (!nlru->memcg_lrus) 68 return &nlru->lru; 69 70 memcg = mem_cgroup_from_kmem(ptr); 71 if (!memcg) 72 return &nlru->lru; 73 74 return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 75} 76#else 77static inline bool list_lru_memcg_aware(struct list_lru *lru) 78{ 79 return false; 80} 81 82static inline struct list_lru_one * 83list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 84{ 85 return &nlru->lru; 86} 87 88static inline struct list_lru_one * 89list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 90{ 91 return &nlru->lru; 92} 93#endif /* CONFIG_MEMCG_KMEM */ 94 95bool list_lru_add(struct list_lru *lru, struct list_head *item) 96{ 97 int nid = page_to_nid(virt_to_page(item)); 98 struct list_lru_node *nlru = &lru->node[nid]; 99 struct list_lru_one *l; 100 101 spin_lock(&nlru->lock); 102 l = list_lru_from_kmem(nlru, item); 103 if (list_empty(item)) { 104 list_add_tail(item, &l->list); 105 l->nr_items++; 106 spin_unlock(&nlru->lock); 107 return true; 108 } 109 spin_unlock(&nlru->lock); 110 return false; 111} 112EXPORT_SYMBOL_GPL(list_lru_add); 113 114bool list_lru_del(struct list_lru *lru, struct list_head *item) 115{ 116 int nid = page_to_nid(virt_to_page(item)); 117 struct list_lru_node *nlru = &lru->node[nid]; 118 struct list_lru_one *l; 119 120 spin_lock(&nlru->lock); 121 l = list_lru_from_kmem(nlru, item); 122 if (!list_empty(item)) { 123 list_del_init(item); 124 l->nr_items--; 125 spin_unlock(&nlru->lock); 126 return true; 127 } 128 spin_unlock(&nlru->lock); 129 return false; 130} 131EXPORT_SYMBOL_GPL(list_lru_del); 132 133void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 134{ 135 list_del_init(item); 136 list->nr_items--; 137} 138EXPORT_SYMBOL_GPL(list_lru_isolate); 139 140void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 141 struct list_head *head) 142{ 143 list_move(item, head); 144 list->nr_items--; 145} 146EXPORT_SYMBOL_GPL(list_lru_isolate_move); 147 148static unsigned long __list_lru_count_one(struct list_lru *lru, 149 int nid, int memcg_idx) 150{ 151 struct list_lru_node *nlru = &lru->node[nid]; 152 struct list_lru_one *l; 153 unsigned long count; 154 155 spin_lock(&nlru->lock); 156 l = list_lru_from_memcg_idx(nlru, memcg_idx); 157 count = l->nr_items; 158 spin_unlock(&nlru->lock); 159 160 return count; 161} 162 163unsigned long list_lru_count_one(struct list_lru *lru, 164 int nid, struct mem_cgroup *memcg) 165{ 166 return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); 167} 168EXPORT_SYMBOL_GPL(list_lru_count_one); 169 170unsigned long list_lru_count_node(struct list_lru *lru, int nid) 171{ 172 long count = 0; 173 int memcg_idx; 174 175 count += __list_lru_count_one(lru, nid, -1); 176 if (list_lru_memcg_aware(lru)) { 177 for_each_memcg_cache_index(memcg_idx) 178 count += __list_lru_count_one(lru, nid, memcg_idx); 179 } 180 return count; 181} 182EXPORT_SYMBOL_GPL(list_lru_count_node); 183 184static unsigned long 185__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 186 list_lru_walk_cb isolate, void *cb_arg, 187 unsigned long *nr_to_walk) 188{ 189 190 struct list_lru_node *nlru = &lru->node[nid]; 191 struct list_lru_one *l; 192 struct list_head *item, *n; 193 unsigned long isolated = 0; 194 195 spin_lock(&nlru->lock); 196 l = list_lru_from_memcg_idx(nlru, memcg_idx); 197restart: 198 list_for_each_safe(item, n, &l->list) { 199 enum lru_status ret; 200 201 /* 202 * decrement nr_to_walk first so that we don't livelock if we 203 * get stuck on large numbesr of LRU_RETRY items 204 */ 205 if (!*nr_to_walk) 206 break; 207 --*nr_to_walk; 208 209 ret = isolate(item, l, &nlru->lock, cb_arg); 210 switch (ret) { 211 case LRU_REMOVED_RETRY: 212 assert_spin_locked(&nlru->lock); 213 case LRU_REMOVED: 214 isolated++; 215 /* 216 * If the lru lock has been dropped, our list 217 * traversal is now invalid and so we have to 218 * restart from scratch. 219 */ 220 if (ret == LRU_REMOVED_RETRY) 221 goto restart; 222 break; 223 case LRU_ROTATE: 224 list_move_tail(item, &l->list); 225 break; 226 case LRU_SKIP: 227 break; 228 case LRU_RETRY: 229 /* 230 * The lru lock has been dropped, our list traversal is 231 * now invalid and so we have to restart from scratch. 232 */ 233 assert_spin_locked(&nlru->lock); 234 goto restart; 235 default: 236 BUG(); 237 } 238 } 239 240 spin_unlock(&nlru->lock); 241 return isolated; 242} 243 244unsigned long 245list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 246 list_lru_walk_cb isolate, void *cb_arg, 247 unsigned long *nr_to_walk) 248{ 249 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), 250 isolate, cb_arg, nr_to_walk); 251} 252EXPORT_SYMBOL_GPL(list_lru_walk_one); 253 254unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 255 list_lru_walk_cb isolate, void *cb_arg, 256 unsigned long *nr_to_walk) 257{ 258 long isolated = 0; 259 int memcg_idx; 260 261 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, 262 nr_to_walk); 263 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 264 for_each_memcg_cache_index(memcg_idx) { 265 isolated += __list_lru_walk_one(lru, nid, memcg_idx, 266 isolate, cb_arg, nr_to_walk); 267 if (*nr_to_walk <= 0) 268 break; 269 } 270 } 271 return isolated; 272} 273EXPORT_SYMBOL_GPL(list_lru_walk_node); 274 275static void init_one_lru(struct list_lru_one *l) 276{ 277 INIT_LIST_HEAD(&l->list); 278 l->nr_items = 0; 279} 280 281#ifdef CONFIG_MEMCG_KMEM 282static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 283 int begin, int end) 284{ 285 int i; 286 287 for (i = begin; i < end; i++) 288 kfree(memcg_lrus->lru[i]); 289} 290 291static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 292 int begin, int end) 293{ 294 int i; 295 296 for (i = begin; i < end; i++) { 297 struct list_lru_one *l; 298 299 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 300 if (!l) 301 goto fail; 302 303 init_one_lru(l); 304 memcg_lrus->lru[i] = l; 305 } 306 return 0; 307fail: 308 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 309 return -ENOMEM; 310} 311 312static int memcg_init_list_lru_node(struct list_lru_node *nlru) 313{ 314 int size = memcg_nr_cache_ids; 315 316 nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); 317 if (!nlru->memcg_lrus) 318 return -ENOMEM; 319 320 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { 321 kfree(nlru->memcg_lrus); 322 return -ENOMEM; 323 } 324 325 return 0; 326} 327 328static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 329{ 330 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); 331 kfree(nlru->memcg_lrus); 332} 333 334static int memcg_update_list_lru_node(struct list_lru_node *nlru, 335 int old_size, int new_size) 336{ 337 struct list_lru_memcg *old, *new; 338 339 BUG_ON(old_size > new_size); 340 341 old = nlru->memcg_lrus; 342 new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); 343 if (!new) 344 return -ENOMEM; 345 346 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 347 kfree(new); 348 return -ENOMEM; 349 } 350 351 memcpy(new, old, old_size * sizeof(void *)); 352 353 /* 354 * The lock guarantees that we won't race with a reader 355 * (see list_lru_from_memcg_idx). 356 * 357 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 358 * we have to use IRQ-safe primitives here to avoid deadlock. 359 */ 360 spin_lock_irq(&nlru->lock); 361 nlru->memcg_lrus = new; 362 spin_unlock_irq(&nlru->lock); 363 364 kfree(old); 365 return 0; 366} 367 368static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 369 int old_size, int new_size) 370{ 371 /* do not bother shrinking the array back to the old size, because we 372 * cannot handle allocation failures here */ 373 __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); 374} 375 376static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 377{ 378 int i; 379 380 for (i = 0; i < nr_node_ids; i++) { 381 if (!memcg_aware) 382 lru->node[i].memcg_lrus = NULL; 383 else if (memcg_init_list_lru_node(&lru->node[i])) 384 goto fail; 385 } 386 return 0; 387fail: 388 for (i = i - 1; i >= 0; i--) 389 memcg_destroy_list_lru_node(&lru->node[i]); 390 return -ENOMEM; 391} 392 393static void memcg_destroy_list_lru(struct list_lru *lru) 394{ 395 int i; 396 397 if (!list_lru_memcg_aware(lru)) 398 return; 399 400 for (i = 0; i < nr_node_ids; i++) 401 memcg_destroy_list_lru_node(&lru->node[i]); 402} 403 404static int memcg_update_list_lru(struct list_lru *lru, 405 int old_size, int new_size) 406{ 407 int i; 408 409 if (!list_lru_memcg_aware(lru)) 410 return 0; 411 412 for (i = 0; i < nr_node_ids; i++) { 413 if (memcg_update_list_lru_node(&lru->node[i], 414 old_size, new_size)) 415 goto fail; 416 } 417 return 0; 418fail: 419 for (i = i - 1; i >= 0; i--) 420 memcg_cancel_update_list_lru_node(&lru->node[i], 421 old_size, new_size); 422 return -ENOMEM; 423} 424 425static void memcg_cancel_update_list_lru(struct list_lru *lru, 426 int old_size, int new_size) 427{ 428 int i; 429 430 if (!list_lru_memcg_aware(lru)) 431 return; 432 433 for (i = 0; i < nr_node_ids; i++) 434 memcg_cancel_update_list_lru_node(&lru->node[i], 435 old_size, new_size); 436} 437 438int memcg_update_all_list_lrus(int new_size) 439{ 440 int ret = 0; 441 struct list_lru *lru; 442 int old_size = memcg_nr_cache_ids; 443 444 mutex_lock(&list_lrus_mutex); 445 list_for_each_entry(lru, &list_lrus, list) { 446 ret = memcg_update_list_lru(lru, old_size, new_size); 447 if (ret) 448 goto fail; 449 } 450out: 451 mutex_unlock(&list_lrus_mutex); 452 return ret; 453fail: 454 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 455 memcg_cancel_update_list_lru(lru, old_size, new_size); 456 goto out; 457} 458 459static void memcg_drain_list_lru_node(struct list_lru_node *nlru, 460 int src_idx, int dst_idx) 461{ 462 struct list_lru_one *src, *dst; 463 464 /* 465 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 466 * we have to use IRQ-safe primitives here to avoid deadlock. 467 */ 468 spin_lock_irq(&nlru->lock); 469 470 src = list_lru_from_memcg_idx(nlru, src_idx); 471 dst = list_lru_from_memcg_idx(nlru, dst_idx); 472 473 list_splice_init(&src->list, &dst->list); 474 dst->nr_items += src->nr_items; 475 src->nr_items = 0; 476 477 spin_unlock_irq(&nlru->lock); 478} 479 480static void memcg_drain_list_lru(struct list_lru *lru, 481 int src_idx, int dst_idx) 482{ 483 int i; 484 485 if (!list_lru_memcg_aware(lru)) 486 return; 487 488 for (i = 0; i < nr_node_ids; i++) 489 memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx); 490} 491 492void memcg_drain_all_list_lrus(int src_idx, int dst_idx) 493{ 494 struct list_lru *lru; 495 496 mutex_lock(&list_lrus_mutex); 497 list_for_each_entry(lru, &list_lrus, list) 498 memcg_drain_list_lru(lru, src_idx, dst_idx); 499 mutex_unlock(&list_lrus_mutex); 500} 501#else 502static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 503{ 504 return 0; 505} 506 507static void memcg_destroy_list_lru(struct list_lru *lru) 508{ 509} 510#endif /* CONFIG_MEMCG_KMEM */ 511 512int __list_lru_init(struct list_lru *lru, bool memcg_aware, 513 struct lock_class_key *key) 514{ 515 int i; 516 size_t size = sizeof(*lru->node) * nr_node_ids; 517 int err = -ENOMEM; 518 519 memcg_get_cache_ids(); 520 521 lru->node = kzalloc(size, GFP_KERNEL); 522 if (!lru->node) 523 goto out; 524 525 for (i = 0; i < nr_node_ids; i++) { 526 spin_lock_init(&lru->node[i].lock); 527 if (key) 528 lockdep_set_class(&lru->node[i].lock, key); 529 init_one_lru(&lru->node[i].lru); 530 } 531 532 err = memcg_init_list_lru(lru, memcg_aware); 533 if (err) { 534 kfree(lru->node); 535 goto out; 536 } 537 538 list_lru_register(lru); 539out: 540 memcg_put_cache_ids(); 541 return err; 542} 543EXPORT_SYMBOL_GPL(__list_lru_init); 544 545void list_lru_destroy(struct list_lru *lru) 546{ 547 /* Already destroyed or not yet initialized? */ 548 if (!lru->node) 549 return; 550 551 memcg_get_cache_ids(); 552 553 list_lru_unregister(lru); 554 555 memcg_destroy_list_lru(lru); 556 kfree(lru->node); 557 lru->node = NULL; 558 559 memcg_put_cache_ids(); 560} 561EXPORT_SYMBOL_GPL(list_lru_destroy); 562