This source file includes following definitions.
- get_next_cpu
- local_free_list
- local_pending_list
- bpf_lru_node_is_ref
- bpf_lru_list_count_inc
- bpf_lru_list_count_dec
- __bpf_lru_node_move_to_free
- __bpf_lru_node_move_in
- __bpf_lru_node_move
- bpf_lru_list_inactive_low
- __bpf_lru_list_rotate_active
- __bpf_lru_list_rotate_inactive
- __bpf_lru_list_shrink_inactive
- __bpf_lru_list_rotate
- __bpf_lru_list_shrink
- __local_list_flush
- bpf_lru_list_push_free
- bpf_lru_list_pop_free_to_local
- __local_list_add_pending
- __local_list_pop_free
- __local_list_pop_pending
- bpf_percpu_lru_pop_free
- bpf_common_lru_pop_free
- bpf_lru_pop_free
- bpf_common_lru_push_free
- bpf_percpu_lru_push_free
- bpf_lru_push_free
- bpf_common_lru_populate
- bpf_percpu_lru_populate
- bpf_lru_populate
- bpf_lru_locallist_init
- bpf_lru_list_init
- bpf_lru_init
- bpf_lru_destroy
1
2
3
4 #include <linux/cpumask.h>
5 #include <linux/spinlock.h>
6 #include <linux/percpu.h>
7
8 #include "bpf_lru_list.h"
9
10 #define LOCAL_FREE_TARGET (128)
11 #define LOCAL_NR_SCANS LOCAL_FREE_TARGET
12
13 #define PERCPU_FREE_TARGET (4)
14 #define PERCPU_NR_SCANS PERCPU_FREE_TARGET
15
16
17 #define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET)
18 #define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE)
19 #define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
20 #define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET)
21
22 static int get_next_cpu(int cpu)
23 {
24 cpu = cpumask_next(cpu, cpu_possible_mask);
25 if (cpu >= nr_cpu_ids)
26 cpu = cpumask_first(cpu_possible_mask);
27 return cpu;
28 }
29
30
31 static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
32 {
33 return &loc_l->lists[LOCAL_FREE_LIST_IDX];
34 }
35
36 static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
37 {
38 return &loc_l->lists[LOCAL_PENDING_LIST_IDX];
39 }
40
41
42 static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
43 {
44 return node->ref;
45 }
46
47 static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
48 enum bpf_lru_list_type type)
49 {
50 if (type < NR_BPF_LRU_LIST_COUNT)
51 l->counts[type]++;
52 }
53
54 static void bpf_lru_list_count_dec(struct bpf_lru_list *l,
55 enum bpf_lru_list_type type)
56 {
57 if (type < NR_BPF_LRU_LIST_COUNT)
58 l->counts[type]--;
59 }
60
61 static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l,
62 struct bpf_lru_node *node,
63 struct list_head *free_list,
64 enum bpf_lru_list_type tgt_free_type)
65 {
66 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
67 return;
68
69
70
71
72 if (&node->list == l->next_inactive_rotation)
73 l->next_inactive_rotation = l->next_inactive_rotation->prev;
74
75 bpf_lru_list_count_dec(l, node->type);
76
77 node->type = tgt_free_type;
78 list_move(&node->list, free_list);
79 }
80
81
82 static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
83 struct bpf_lru_node *node,
84 enum bpf_lru_list_type tgt_type)
85 {
86 if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) ||
87 WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
88 return;
89
90 bpf_lru_list_count_inc(l, tgt_type);
91 node->type = tgt_type;
92 node->ref = 0;
93 list_move(&node->list, &l->lists[tgt_type]);
94 }
95
96
97
98
99
100 static void __bpf_lru_node_move(struct bpf_lru_list *l,
101 struct bpf_lru_node *node,
102 enum bpf_lru_list_type tgt_type)
103 {
104 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) ||
105 WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
106 return;
107
108 if (node->type != tgt_type) {
109 bpf_lru_list_count_dec(l, node->type);
110 bpf_lru_list_count_inc(l, tgt_type);
111 node->type = tgt_type;
112 }
113 node->ref = 0;
114
115
116
117
118 if (&node->list == l->next_inactive_rotation)
119 l->next_inactive_rotation = l->next_inactive_rotation->prev;
120
121 list_move(&node->list, &l->lists[tgt_type]);
122 }
123
124 static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l)
125 {
126 return l->counts[BPF_LRU_LIST_T_INACTIVE] <
127 l->counts[BPF_LRU_LIST_T_ACTIVE];
128 }
129
130
131
132
133
134
135
136
137
138
139 static void __bpf_lru_list_rotate_active(struct bpf_lru *lru,
140 struct bpf_lru_list *l)
141 {
142 struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE];
143 struct bpf_lru_node *node, *tmp_node, *first_node;
144 unsigned int i = 0;
145
146 first_node = list_first_entry(active, struct bpf_lru_node, list);
147 list_for_each_entry_safe_reverse(node, tmp_node, active, list) {
148 if (bpf_lru_node_is_ref(node))
149 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
150 else
151 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
152
153 if (++i == lru->nr_scans || node == first_node)
154 break;
155 }
156 }
157
158
159
160
161
162
163
164
165
166 static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru,
167 struct bpf_lru_list *l)
168 {
169 struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
170 struct list_head *cur, *last, *next = inactive;
171 struct bpf_lru_node *node;
172 unsigned int i = 0;
173
174 if (list_empty(inactive))
175 return;
176
177 last = l->next_inactive_rotation->next;
178 if (last == inactive)
179 last = last->next;
180
181 cur = l->next_inactive_rotation;
182 while (i < lru->nr_scans) {
183 if (cur == inactive) {
184 cur = cur->prev;
185 continue;
186 }
187
188 node = list_entry(cur, struct bpf_lru_node, list);
189 next = cur->prev;
190 if (bpf_lru_node_is_ref(node))
191 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
192 if (cur == last)
193 break;
194 cur = next;
195 i++;
196 }
197
198 l->next_inactive_rotation = next;
199 }
200
201
202
203
204
205 static unsigned int
206 __bpf_lru_list_shrink_inactive(struct bpf_lru *lru,
207 struct bpf_lru_list *l,
208 unsigned int tgt_nshrink,
209 struct list_head *free_list,
210 enum bpf_lru_list_type tgt_free_type)
211 {
212 struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
213 struct bpf_lru_node *node, *tmp_node;
214 unsigned int nshrinked = 0;
215 unsigned int i = 0;
216
217 list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) {
218 if (bpf_lru_node_is_ref(node)) {
219 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
220 } else if (lru->del_from_htab(lru->del_arg, node)) {
221 __bpf_lru_node_move_to_free(l, node, free_list,
222 tgt_free_type);
223 if (++nshrinked == tgt_nshrink)
224 break;
225 }
226
227 if (++i == lru->nr_scans)
228 break;
229 }
230
231 return nshrinked;
232 }
233
234
235
236
237 static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l)
238 {
239 if (bpf_lru_list_inactive_low(l))
240 __bpf_lru_list_rotate_active(lru, l);
241
242 __bpf_lru_list_rotate_inactive(lru, l);
243 }
244
245
246
247
248
249
250
251
252
253
254
255 static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru,
256 struct bpf_lru_list *l,
257 unsigned int tgt_nshrink,
258 struct list_head *free_list,
259 enum bpf_lru_list_type tgt_free_type)
260
261 {
262 struct bpf_lru_node *node, *tmp_node;
263 struct list_head *force_shrink_list;
264 unsigned int nshrinked;
265
266 nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink,
267 free_list, tgt_free_type);
268 if (nshrinked)
269 return nshrinked;
270
271
272 if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE]))
273 force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE];
274 else
275 force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE];
276
277 list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list,
278 list) {
279 if (lru->del_from_htab(lru->del_arg, node)) {
280 __bpf_lru_node_move_to_free(l, node, free_list,
281 tgt_free_type);
282 return 1;
283 }
284 }
285
286 return 0;
287 }
288
289
290 static void __local_list_flush(struct bpf_lru_list *l,
291 struct bpf_lru_locallist *loc_l)
292 {
293 struct bpf_lru_node *node, *tmp_node;
294
295 list_for_each_entry_safe_reverse(node, tmp_node,
296 local_pending_list(loc_l), list) {
297 if (bpf_lru_node_is_ref(node))
298 __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE);
299 else
300 __bpf_lru_node_move_in(l, node,
301 BPF_LRU_LIST_T_INACTIVE);
302 }
303 }
304
305 static void bpf_lru_list_push_free(struct bpf_lru_list *l,
306 struct bpf_lru_node *node)
307 {
308 unsigned long flags;
309
310 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
311 return;
312
313 raw_spin_lock_irqsave(&l->lock, flags);
314 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
315 raw_spin_unlock_irqrestore(&l->lock, flags);
316 }
317
318 static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
319 struct bpf_lru_locallist *loc_l)
320 {
321 struct bpf_lru_list *l = &lru->common_lru.lru_list;
322 struct bpf_lru_node *node, *tmp_node;
323 unsigned int nfree = 0;
324
325 raw_spin_lock(&l->lock);
326
327 __local_list_flush(l, loc_l);
328
329 __bpf_lru_list_rotate(lru, l);
330
331 list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE],
332 list) {
333 __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l),
334 BPF_LRU_LOCAL_LIST_T_FREE);
335 if (++nfree == LOCAL_FREE_TARGET)
336 break;
337 }
338
339 if (nfree < LOCAL_FREE_TARGET)
340 __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree,
341 local_free_list(loc_l),
342 BPF_LRU_LOCAL_LIST_T_FREE);
343
344 raw_spin_unlock(&l->lock);
345 }
346
347 static void __local_list_add_pending(struct bpf_lru *lru,
348 struct bpf_lru_locallist *loc_l,
349 int cpu,
350 struct bpf_lru_node *node,
351 u32 hash)
352 {
353 *(u32 *)((void *)node + lru->hash_offset) = hash;
354 node->cpu = cpu;
355 node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
356 node->ref = 0;
357 list_add(&node->list, local_pending_list(loc_l));
358 }
359
360 static struct bpf_lru_node *
361 __local_list_pop_free(struct bpf_lru_locallist *loc_l)
362 {
363 struct bpf_lru_node *node;
364
365 node = list_first_entry_or_null(local_free_list(loc_l),
366 struct bpf_lru_node,
367 list);
368 if (node)
369 list_del(&node->list);
370
371 return node;
372 }
373
374 static struct bpf_lru_node *
375 __local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l)
376 {
377 struct bpf_lru_node *node;
378 bool force = false;
379
380 ignore_ref:
381
382 list_for_each_entry_reverse(node, local_pending_list(loc_l),
383 list) {
384 if ((!bpf_lru_node_is_ref(node) || force) &&
385 lru->del_from_htab(lru->del_arg, node)) {
386 list_del(&node->list);
387 return node;
388 }
389 }
390
391 if (!force) {
392 force = true;
393 goto ignore_ref;
394 }
395
396 return NULL;
397 }
398
399 static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
400 u32 hash)
401 {
402 struct list_head *free_list;
403 struct bpf_lru_node *node = NULL;
404 struct bpf_lru_list *l;
405 unsigned long flags;
406 int cpu = raw_smp_processor_id();
407
408 l = per_cpu_ptr(lru->percpu_lru, cpu);
409
410 raw_spin_lock_irqsave(&l->lock, flags);
411
412 __bpf_lru_list_rotate(lru, l);
413
414 free_list = &l->lists[BPF_LRU_LIST_T_FREE];
415 if (list_empty(free_list))
416 __bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list,
417 BPF_LRU_LIST_T_FREE);
418
419 if (!list_empty(free_list)) {
420 node = list_first_entry(free_list, struct bpf_lru_node, list);
421 *(u32 *)((void *)node + lru->hash_offset) = hash;
422 node->ref = 0;
423 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
424 }
425
426 raw_spin_unlock_irqrestore(&l->lock, flags);
427
428 return node;
429 }
430
431 static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,
432 u32 hash)
433 {
434 struct bpf_lru_locallist *loc_l, *steal_loc_l;
435 struct bpf_common_lru *clru = &lru->common_lru;
436 struct bpf_lru_node *node;
437 int steal, first_steal;
438 unsigned long flags;
439 int cpu = raw_smp_processor_id();
440
441 loc_l = per_cpu_ptr(clru->local_list, cpu);
442
443 raw_spin_lock_irqsave(&loc_l->lock, flags);
444
445 node = __local_list_pop_free(loc_l);
446 if (!node) {
447 bpf_lru_list_pop_free_to_local(lru, loc_l);
448 node = __local_list_pop_free(loc_l);
449 }
450
451 if (node)
452 __local_list_add_pending(lru, loc_l, cpu, node, hash);
453
454 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
455
456 if (node)
457 return node;
458
459
460
461
462
463
464
465
466
467 first_steal = loc_l->next_steal;
468 steal = first_steal;
469 do {
470 steal_loc_l = per_cpu_ptr(clru->local_list, steal);
471
472 raw_spin_lock_irqsave(&steal_loc_l->lock, flags);
473
474 node = __local_list_pop_free(steal_loc_l);
475 if (!node)
476 node = __local_list_pop_pending(lru, steal_loc_l);
477
478 raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
479
480 steal = get_next_cpu(steal);
481 } while (!node && steal != first_steal);
482
483 loc_l->next_steal = steal;
484
485 if (node) {
486 raw_spin_lock_irqsave(&loc_l->lock, flags);
487 __local_list_add_pending(lru, loc_l, cpu, node, hash);
488 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
489 }
490
491 return node;
492 }
493
494 struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
495 {
496 if (lru->percpu)
497 return bpf_percpu_lru_pop_free(lru, hash);
498 else
499 return bpf_common_lru_pop_free(lru, hash);
500 }
501
502 static void bpf_common_lru_push_free(struct bpf_lru *lru,
503 struct bpf_lru_node *node)
504 {
505 unsigned long flags;
506
507 if (WARN_ON_ONCE(node->type == BPF_LRU_LIST_T_FREE) ||
508 WARN_ON_ONCE(node->type == BPF_LRU_LOCAL_LIST_T_FREE))
509 return;
510
511 if (node->type == BPF_LRU_LOCAL_LIST_T_PENDING) {
512 struct bpf_lru_locallist *loc_l;
513
514 loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu);
515
516 raw_spin_lock_irqsave(&loc_l->lock, flags);
517
518 if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) {
519 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
520 goto check_lru_list;
521 }
522
523 node->type = BPF_LRU_LOCAL_LIST_T_FREE;
524 node->ref = 0;
525 list_move(&node->list, local_free_list(loc_l));
526
527 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
528 return;
529 }
530
531 check_lru_list:
532 bpf_lru_list_push_free(&lru->common_lru.lru_list, node);
533 }
534
535 static void bpf_percpu_lru_push_free(struct bpf_lru *lru,
536 struct bpf_lru_node *node)
537 {
538 struct bpf_lru_list *l;
539 unsigned long flags;
540
541 l = per_cpu_ptr(lru->percpu_lru, node->cpu);
542
543 raw_spin_lock_irqsave(&l->lock, flags);
544
545 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
546
547 raw_spin_unlock_irqrestore(&l->lock, flags);
548 }
549
550 void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
551 {
552 if (lru->percpu)
553 bpf_percpu_lru_push_free(lru, node);
554 else
555 bpf_common_lru_push_free(lru, node);
556 }
557
558 static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
559 u32 node_offset, u32 elem_size,
560 u32 nr_elems)
561 {
562 struct bpf_lru_list *l = &lru->common_lru.lru_list;
563 u32 i;
564
565 for (i = 0; i < nr_elems; i++) {
566 struct bpf_lru_node *node;
567
568 node = (struct bpf_lru_node *)(buf + node_offset);
569 node->type = BPF_LRU_LIST_T_FREE;
570 node->ref = 0;
571 list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
572 buf += elem_size;
573 }
574 }
575
576 static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
577 u32 node_offset, u32 elem_size,
578 u32 nr_elems)
579 {
580 u32 i, pcpu_entries;
581 int cpu;
582 struct bpf_lru_list *l;
583
584 pcpu_entries = nr_elems / num_possible_cpus();
585
586 i = 0;
587
588 for_each_possible_cpu(cpu) {
589 struct bpf_lru_node *node;
590
591 l = per_cpu_ptr(lru->percpu_lru, cpu);
592 again:
593 node = (struct bpf_lru_node *)(buf + node_offset);
594 node->cpu = cpu;
595 node->type = BPF_LRU_LIST_T_FREE;
596 node->ref = 0;
597 list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
598 i++;
599 buf += elem_size;
600 if (i == nr_elems)
601 break;
602 if (i % pcpu_entries)
603 goto again;
604 }
605 }
606
607 void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
608 u32 elem_size, u32 nr_elems)
609 {
610 if (lru->percpu)
611 bpf_percpu_lru_populate(lru, buf, node_offset, elem_size,
612 nr_elems);
613 else
614 bpf_common_lru_populate(lru, buf, node_offset, elem_size,
615 nr_elems);
616 }
617
618 static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu)
619 {
620 int i;
621
622 for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++)
623 INIT_LIST_HEAD(&loc_l->lists[i]);
624
625 loc_l->next_steal = cpu;
626
627 raw_spin_lock_init(&loc_l->lock);
628 }
629
630 static void bpf_lru_list_init(struct bpf_lru_list *l)
631 {
632 int i;
633
634 for (i = 0; i < NR_BPF_LRU_LIST_T; i++)
635 INIT_LIST_HEAD(&l->lists[i]);
636
637 for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++)
638 l->counts[i] = 0;
639
640 l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE];
641
642 raw_spin_lock_init(&l->lock);
643 }
644
645 int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
646 del_from_htab_func del_from_htab, void *del_arg)
647 {
648 int cpu;
649
650 if (percpu) {
651 lru->percpu_lru = alloc_percpu(struct bpf_lru_list);
652 if (!lru->percpu_lru)
653 return -ENOMEM;
654
655 for_each_possible_cpu(cpu) {
656 struct bpf_lru_list *l;
657
658 l = per_cpu_ptr(lru->percpu_lru, cpu);
659 bpf_lru_list_init(l);
660 }
661 lru->nr_scans = PERCPU_NR_SCANS;
662 } else {
663 struct bpf_common_lru *clru = &lru->common_lru;
664
665 clru->local_list = alloc_percpu(struct bpf_lru_locallist);
666 if (!clru->local_list)
667 return -ENOMEM;
668
669 for_each_possible_cpu(cpu) {
670 struct bpf_lru_locallist *loc_l;
671
672 loc_l = per_cpu_ptr(clru->local_list, cpu);
673 bpf_lru_locallist_init(loc_l, cpu);
674 }
675
676 bpf_lru_list_init(&clru->lru_list);
677 lru->nr_scans = LOCAL_NR_SCANS;
678 }
679
680 lru->percpu = percpu;
681 lru->del_from_htab = del_from_htab;
682 lru->del_arg = del_arg;
683 lru->hash_offset = hash_offset;
684
685 return 0;
686 }
687
688 void bpf_lru_destroy(struct bpf_lru *lru)
689 {
690 if (lru->percpu)
691 free_percpu(lru->percpu_lru);
692 else
693 free_percpu(lru->common_lru.local_list);
694 }